import requests, time, json, gradio as gr, pandas as pd, ast def agi_answer(endpoint, instruction, question, temp, top_p, top_k, beams, max_tokens): response = requests.post(f'{endpoint}/run/predict', json={ 'data': [ instruction, question, temp, top_p, top_k, beams, max_tokens, ] }).json() return response['data'][0] def eval_agi(endpoint, temp, top_p, top_k, beams, max_tokens): test_df = pd.read_csv('mmlu_testdf.csv') for index, value in test_df['input'].iteritems(): ans = agi_answer(endpoint, ast.literal_eval(value)[0]['content'], ast.literal_eval(value)[1]['content'], temp, top_p, top_k, beams, max_tokens) test_df.loc[index, 'Answer_AGI'] = ans[:1] test_df.loc[index, 'Answer_AGI_raw'] = ans print(index, '/', test_df.shape[0]) time.sleep(0.001) accuracy = (test_df['ideal'] == test_df['Answer_AGI']).sum() / len(test_df) return [accuracy, test_df[['ideal', 'Answer_AGI']]] demo = gr.Interface(fn=eval_agi, inputs=[ gr.inputs.Textbox(default='https://191779ad955db5c67f.gradio.live', label='endpoint'), gr.inputs.Slider(0, 1, label='temperature', default=0.1), gr.inputs.Slider(0, 1, default=0.75, label='top p'), gr.inputs.Slider(0, 100, default=40, label='top k'), gr.inputs.Slider(0, 4, default=4, label='beams'), gr.inputs.Slider(0, 2000, default=128, label='max tokens') ], outputs=[ gr.outputs.Label(label="Accuracy"), 'dataframe' ]) demo.launch()