Spaces:
Running
Running
""" | |
It provides a platform for comparing the responses of two LLMs. | |
""" | |
import enum | |
from uuid import uuid4 | |
from firebase_admin import firestore | |
import gradio as gr | |
import lingua | |
from db import db | |
from leaderboard import build_leaderboard | |
from leaderboard import SUPPORTED_LANGUAGES | |
from model import check_models | |
from model import supported_models | |
from rate_limit import set_token | |
import response | |
from response import get_responses | |
detector = lingua.LanguageDetectorBuilder.from_all_languages().build() | |
class VoteOptions(enum.Enum): | |
MODEL_A = "Model A is better" | |
MODEL_B = "Model B is better" | |
TIE = "Tie" | |
def vote(vote_button, response_a, response_b, model_a_name, model_b_name, | |
prompt, instruction, category, source_lang, target_lang): | |
doc_id = uuid4().hex | |
winner = VoteOptions(vote_button).name.lower() | |
deactivated_buttons = [gr.Button(interactive=False) for _ in range(3)] | |
outputs = deactivated_buttons + [gr.Row(visible=True)] | |
doc = { | |
"id": doc_id, | |
"prompt": prompt, | |
"instruction": instruction, | |
"model_a": model_a_name, | |
"model_b": model_b_name, | |
"model_a_response": response_a, | |
"model_b_response": response_b, | |
"winner": winner, | |
"timestamp": firestore.SERVER_TIMESTAMP | |
} | |
if category == response.Category.SUMMARIZE.value: | |
language_a = detector.detect_language_of(response_a) | |
language_b = detector.detect_language_of(response_b) | |
# TODO(#37): Move DB operations to db.py. | |
doc_ref = db.collection("arena-summarizations").document(doc_id) | |
doc["model_a_response_language"] = language_a.name.lower() | |
doc["model_b_response_language"] = language_b.name.lower() | |
doc_ref.set(doc) | |
return outputs | |
if category == response.Category.TRANSLATE.value: | |
if not source_lang or not target_lang: | |
raise gr.Error("Please select source and target languages.") | |
doc_ref = db.collection("arena-translations").document(doc_id) | |
doc["source_language"] = source_lang.lower() | |
doc["target_language"] = target_lang.lower() | |
doc_ref.set(doc) | |
return outputs | |
raise gr.Error("Please select a response type.") | |
# Removes the persistent orange border from the leaderboard, which | |
# appears due to the 'generating' class when using the 'every' parameter. | |
css = """ | |
.leaderboard .generating { | |
border: none; | |
} | |
""" | |
with gr.Blocks(title="Yanolja Arena", css=css) as app: | |
token = gr.Textbox(visible=False) | |
set_token(app, token) | |
with gr.Row(): | |
gr.HTML(""" | |
<h1 style="text-align: center; font-size: 28px; margin-bottom: 16px">Yanolja Arena</h1> | |
<p style="text-align: center; font-size: 16px">Yanolja Arena helps find the best LLMs for summarizing and translating text. We compare two random models at a time and use an ELO rating system to score them.</p> | |
<p style="text-align: center; font-size: 16px">This is an open-source project. Check it out on <a href="https://github.com/yanolja/arena">GitHub</a>.</p> | |
""") | |
with gr.Accordion("How to Use", open=False): | |
gr.Markdown(""" | |
1. **For Summaries:** | |
- Enter the text you want summarized into the prompt box. | |
2. **For Translations:** | |
- Choose the language you're translating from and to. | |
- Enter the text you want translated into the prompt box. | |
3. **Voting:** | |
- After you see both results, pick which one you think is better. | |
""") | |
with gr.Accordion("Available Models", open=False): | |
gr.Markdown("\n".join([f"- {model.name}" for model in supported_models])) | |
with gr.Row(): | |
category_radio = gr.Radio( | |
choices=[category.value for category in response.Category], | |
value=response.Category.SUMMARIZE.value, | |
label="Category", | |
info="The chosen category determines the instruction sent to the LLMs.") | |
source_language = gr.Dropdown( | |
choices=SUPPORTED_LANGUAGES, | |
value=lingua.Language.ENGLISH.name.capitalize(), | |
label="Source language", | |
info="Choose the source language for translation.", | |
interactive=True, | |
visible=False) | |
target_language = gr.Dropdown( | |
choices=SUPPORTED_LANGUAGES, | |
value=lingua.Language.KOREAN.name.capitalize(), | |
label="Target language", | |
info="Choose the target language for translation.", | |
interactive=True, | |
visible=False) | |
def update_language_visibility(category): | |
visible = category == response.Category.TRANSLATE.value | |
return { | |
source_language: gr.Dropdown(visible=visible), | |
target_language: gr.Dropdown(visible=visible) | |
} | |
category_radio.change(update_language_visibility, category_radio, | |
[source_language, target_language]) | |
model_names = [gr.State(None), gr.State(None)] | |
response_boxes = [gr.State(None), gr.State(None)] | |
prompt_textarea = gr.TextArea(label="Prompt", lines=4) | |
submit = gr.Button() | |
with gr.Group(): | |
with gr.Row(): | |
response_boxes[0] = gr.Textbox(label="Model A", interactive=False) | |
response_boxes[1] = gr.Textbox(label="Model B", interactive=False) | |
with gr.Row(visible=False) as model_name_row: | |
model_names[0] = gr.Textbox(show_label=False) | |
model_names[1] = gr.Textbox(show_label=False) | |
with gr.Row(visible=False) as vote_row: | |
option_a = gr.Button(VoteOptions.MODEL_A.value) | |
option_b = gr.Button(VoteOptions.MODEL_B.value) | |
tie = gr.Button(VoteOptions.TIE.value) | |
instruction_state = gr.State("") | |
# The following elements need to be reset when the user changes | |
# the category, source language, or target language. | |
ui_elements = [ | |
response_boxes[0], response_boxes[1], model_names[0], model_names[1], | |
instruction_state, model_name_row, vote_row | |
] | |
def reset_ui(): | |
return [gr.Textbox(value="") for _ in range(4) | |
] + [gr.State(""), | |
gr.Row(visible=False), | |
gr.Row(visible=False)] | |
category_radio.change(fn=reset_ui, outputs=ui_elements) | |
source_language.change(fn=reset_ui, outputs=ui_elements) | |
target_language.change(fn=reset_ui, outputs=ui_elements) | |
submit_event = submit.click( | |
fn=lambda: [ | |
gr.Radio(interactive=False), | |
gr.Dropdown(interactive=False), | |
gr.Dropdown(interactive=False), | |
gr.Button(interactive=False), | |
gr.Row(visible=False), | |
gr.Row(visible=False), | |
] + [gr.Button(interactive=True) for _ in range(3)], | |
outputs=[ | |
category_radio, source_language, target_language, submit, vote_row, | |
model_name_row, option_a, option_b, tie | |
]).then(fn=get_responses, | |
inputs=[ | |
prompt_textarea, category_radio, source_language, | |
target_language, token | |
], | |
outputs=response_boxes + model_names + [instruction_state]) | |
submit_event.success(fn=lambda: gr.Row(visible=True), outputs=vote_row) | |
submit_event.then( | |
fn=lambda: [ | |
gr.Radio(interactive=True), | |
gr.Dropdown(interactive=True), | |
gr.Dropdown(interactive=True), | |
gr.Button(interactive=True) | |
], | |
outputs=[category_radio, source_language, target_language, submit]) | |
def deactivate_after_voting(option_button: gr.Button): | |
option_button.click( | |
fn=vote, | |
inputs=[option_button] + response_boxes + model_names + [ | |
prompt_textarea, instruction_state, category_radio, source_language, | |
target_language | |
], | |
outputs=[option_a, option_b, tie, model_name_row]).then( | |
fn=lambda: [gr.Button(interactive=False) for _ in range(3)], | |
outputs=[option_a, option_b, tie]) | |
for option in [option_a, option_b, tie]: | |
deactivate_after_voting(option) | |
build_leaderboard() | |
if __name__ == "__main__": | |
check_models(supported_models) | |
# We need to enable queue to use generators. | |
app.queue(api_open=False) | |
app.launch(debug=True, show_api=False) | |