Spaces:

yanolja
/

arena

Running

suhyun.kang commited on Feb 1, 2024

Commit

cf196b3

1 Parent(s): 5e33531

[#1] Add voting feature

Changes:
- Added response type selection.
- Included source and target language options for the "Translate" selection.
- Implemented the voting feature.

Firestore example item: https://console.firebase.google.com/u/0/project/special-tf-prod/firestore/data/~2Farena-summarizations~2F28213a8a0c1c44c295745841dabc7ad4?hl=ko

Screenshot: https://screen.yanolja.in/tCD6mJ0CpqoGDZwr.png

Files changed (3) hide show

.gitignore +1 -0
app.py +109 -0
requirments.txt +12 -0

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	venv


1	venv
2	+ *.log

app.py CHANGED Viewed

@@ -2,15 +2,70 @@
 It provides a platform for comparing the responses of two LLMs.
 """
 from random import sample
 from fastchat.serve import gradio_web_server
 from fastchat.serve.gradio_web_server import bot_response
 import gradio as gr
 # TODO(#1): Add more models.
 SUPPORTED_MODELS = ["gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", "gemini-pro"]
 def user(user_prompt):
   model_pair = sample(SUPPORTED_MODELS, 2)
@@ -85,6 +140,40 @@ def bot(state_a, state_b, request: gr.Request):
 with gr.Blocks() as app:
   model_names = [gr.State(None), gr.State(None)]
   responses = [gr.State(None), gr.State(None)]
@@ -98,6 +187,26 @@ with gr.Blocks() as app:
     responses[0] = gr.Textbox(label="Model A", interactive=False)
     responses[1] = gr.Textbox(label="Model B", interactive=False)
   with gr.Accordion("Show models", open=False):
     with gr.Row():
       model_names[0] = gr.Textbox(label="Model A", interactive=False)

 It provides a platform for comparing the responses of two LLMs.
 """
+import enum
+import json
 from random import sample
+from uuid import uuid4
 from fastchat.serve import gradio_web_server
 from fastchat.serve.gradio_web_server import bot_response
+import firebase_admin
+from firebase_admin import firestore
 import gradio as gr
+db_app = firebase_admin.initialize_app()
+db = firestore.client()
 # TODO(#1): Add more models.
 SUPPORTED_MODELS = ["gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", "gemini-pro"]
+SUPPORTED_TRANSLATION_LANGUAGES = ["Korean", "English"]
+class ResponseType(enum.Enum):
+  SUMMARIZE = "Summarize"
+  TRANSLATE = "Translate"
+class VoteOptions(enum.Enum):
+  MODEL_A = "Model A is better"
+  MODEL_B = "Model B is better"
+  TIE = "Tie"
+def vote(state_a, state_b, vote_button, res_type, source_lang, target_lang):
+  winner = VoteOptions(vote_button).name.lower()
+  # The 'messages' field in the state is an array of arrays, a data type
+  # not supported by Firestore. Therefore, we convert it to a JSON string.
+  model_a_conv = json.dumps(state_a.dict())
+  model_b_conv = json.dumps(state_b.dict())
+  if res_type == ResponseType.SUMMARIZE.value:
+    doc_ref = db.collection("arena-summarizations").document(uuid4().hex)
+    doc_ref.set({
+        "model_a": state_a.model_name,
+        "model_b": state_b.model_name,
+        "model_a_conv": model_a_conv,
+        "model_b_conv": model_b_conv,
+        "winner": winner,
+        "timestamp": firestore.SERVER_TIMESTAMP
+    })
+    return
+  if res_type == ResponseType.TRANSLATE.value:
+    doc_ref = db.collection("arena-translations").document(uuid4().hex)
+    doc_ref.set({
+        "model_a": state_a.model_name,
+        "model_b": state_b.model_name,
+        "model_a_conv": model_a_conv,
+        "model_b_conv": model_b_conv,
+        "source_language": source_lang.lower(),
+        "target_language": target_lang.lower(),
+        "winner": winner,
+        "timestamp": firestore.SERVER_TIMESTAMP
+    })
 def user(user_prompt):
   model_pair = sample(SUPPORTED_MODELS, 2)
 with gr.Blocks() as app:
+  with gr.Row():
+    response_type_radio = gr.Radio(
+        [response_type.value for response_type in ResponseType],
+        label="Response type",
+        info="Choose the type of response you want from the model.")
+    source_language = gr.Dropdown(
+        choices=SUPPORTED_TRANSLATION_LANGUAGES,
+        label="Source language",
+        info="Choose the source language for translation.",
+        interactive=True,
+        visible=False)
+    target_language = gr.Dropdown(
+        choices=SUPPORTED_TRANSLATION_LANGUAGES,
+        label="Target language",
+        info="Choose the target language for translation.",
+        interactive=True,
+        visible=False)
+    def update_language_visibility(response_type):
+      if response_type != ResponseType.TRANSLATE.value:
+        return {
+            source_language: gr.Dropdown(visible=False),
+            target_language: gr.Dropdown(visible=False)
+        }
+      return {
+          source_language: gr.Dropdown(visible=True),
+          target_language: gr.Dropdown(visible=True)
+      }
+    response_type_radio.change(update_language_visibility, response_type_radio,
+                               [source_language, target_language])
   model_names = [gr.State(None), gr.State(None)]
   responses = [gr.State(None), gr.State(None)]
     responses[0] = gr.Textbox(label="Model A", interactive=False)
     responses[1] = gr.Textbox(label="Model B", interactive=False)
+  # TODO(#1): Display it only after the user submits the prompt.
+  # TODO(#1): Block voting if the response_type is not set.
+  # TODO(#1): Block voting if the user already voted.
+  with gr.Row():
+    option_a = gr.Button(VoteOptions.MODEL_A.value)
+    option_a.click(
+        vote, states +
+        [option_a, response_type_radio, source_language, target_language])
+    option_b = gr.Button("Model B is better")
+    option_b.click(
+        vote, states +
+        [option_b, response_type_radio, source_language, target_language])
+    tie = gr.Button("Tie")
+    tie.click(
+        vote,
+        states + [tie, response_type_radio, source_language, target_language])
+  # TODO(#1): Hide it until the user votes.
   with gr.Accordion("Show models", open=False):
     with gr.Row():
       model_names[0] = gr.Textbox(label="Model A", interactive=False)

requirments.txt CHANGED Viewed

@@ -6,26 +6,33 @@ altair==5.2.0
 annotated-types==0.6.0
 anyio==4.2.0
 attrs==23.2.0
 cachetools==5.3.2
 certifi==2023.11.17
 charset-normalizer==3.3.2
 click==8.1.7
 colorama==0.4.6
 contourpy==1.2.0
 cycler==0.12.1
 distro==1.9.0
 fastapi==0.109.0
 ffmpy==0.3.1
 filelock==3.13.1
 fonttools==4.47.2
 frozenlist==1.4.1
 fschat==0.2.35
 fsspec==2023.12.2
 google-api-core==2.16.1
 google-auth==2.27.0
 google-cloud-aiplatform==1.40.0
 google-cloud-bigquery==3.17.1
 google-cloud-core==2.4.1
 google-cloud-resource-manager==1.11.0
 google-cloud-storage==2.14.0
 google-crc32c==1.5.0
@@ -38,6 +45,7 @@ grpcio==1.60.0
 grpcio-status==1.60.0
 h11==0.14.0
 httpcore==1.0.2
 httpx==0.26.0
 huggingface-hub==0.20.3
 idna==3.6
@@ -52,6 +60,7 @@ MarkupSafe==2.1.4
 matplotlib==3.8.2
 mdurl==0.1.2
 mpmath==1.3.0
 multidict==6.0.4
 networkx==3.2.1
 nh3==0.2.15
@@ -68,10 +77,12 @@ protobuf==4.25.2
 psutil==5.9.8
 pyasn1==0.5.1
 pyasn1-modules==0.3.0
 pydantic==1.10.14
 pydantic_core==2.16.1
 pydub==0.25.1
 Pygments==2.17.2
 pyparsing==3.1.1
 python-dateutil==2.8.2
 python-multipart==0.0.6
@@ -105,6 +116,7 @@ transformers==4.37.2
 typer==0.9.0
 typing_extensions==4.9.0
 tzdata==2023.4
 urllib3==2.2.0
 uvicorn==0.27.0.post1
 wavedrom==2.0.3.post3

 annotated-types==0.6.0
 anyio==4.2.0
 attrs==23.2.0
+CacheControl==0.13.1
 cachetools==5.3.2
 certifi==2023.11.17
+cffi==1.16.0
 charset-normalizer==3.3.2
 click==8.1.7
 colorama==0.4.6
 contourpy==1.2.0
+cryptography==42.0.2
 cycler==0.12.1
 distro==1.9.0
 fastapi==0.109.0
 ffmpy==0.3.1
 filelock==3.13.1
+firebase-admin==6.4.0
 fonttools==4.47.2
 frozenlist==1.4.1
 fschat==0.2.35
 fsspec==2023.12.2
 google-api-core==2.16.1
+google-api-python-client==2.116.0
 google-auth==2.27.0
+google-auth-httplib2==0.2.0
 google-cloud-aiplatform==1.40.0
 google-cloud-bigquery==3.17.1
 google-cloud-core==2.4.1
+google-cloud-firestore==2.14.0
 google-cloud-resource-manager==1.11.0
 google-cloud-storage==2.14.0
 google-crc32c==1.5.0
 grpcio-status==1.60.0
 h11==0.14.0
 httpcore==1.0.2
+httplib2==0.22.0
 httpx==0.26.0
 huggingface-hub==0.20.3
 idna==3.6
 matplotlib==3.8.2
 mdurl==0.1.2
 mpmath==1.3.0
+msgpack==1.0.7
 multidict==6.0.4
 networkx==3.2.1
 nh3==0.2.15
 psutil==5.9.8
 pyasn1==0.5.1
 pyasn1-modules==0.3.0
+pycparser==2.21
 pydantic==1.10.14
 pydantic_core==2.16.1
 pydub==0.25.1
 Pygments==2.17.2
+PyJWT==2.8.0
 pyparsing==3.1.1
 python-dateutil==2.8.2
 python-multipart==0.0.6
 typer==0.9.0
 typing_extensions==4.9.0
 tzdata==2023.4
+uritemplate==4.1.1
 urllib3==2.2.0
 uvicorn==0.27.0.post1
 wavedrom==2.0.3.post3