Spaces:
Running
Running
suhyun.kang
commited on
Commit
•
cf196b3
1
Parent(s):
5e33531
[#1] Add voting feature
Browse filesChanges:
- Added response type selection.
- Included source and target language options for the "Translate" selection.
- Implemented the voting feature.
Firestore example item: https://console.firebase.google.com/u/0/project/special-tf-prod/firestore/data/~2Farena-summarizations~2F28213a8a0c1c44c295745841dabc7ad4?hl=ko
Screenshot: https://screen.yanolja.in/tCD6mJ0CpqoGDZwr.png
- .gitignore +1 -0
- app.py +109 -0
- requirments.txt +12 -0
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
venv
|
|
|
|
1 |
venv
|
2 |
+
*.log
|
app.py
CHANGED
@@ -2,15 +2,70 @@
|
|
2 |
It provides a platform for comparing the responses of two LLMs.
|
3 |
"""
|
4 |
|
|
|
|
|
5 |
from random import sample
|
|
|
6 |
|
7 |
from fastchat.serve import gradio_web_server
|
8 |
from fastchat.serve.gradio_web_server import bot_response
|
|
|
|
|
9 |
import gradio as gr
|
10 |
|
|
|
|
|
|
|
11 |
# TODO(#1): Add more models.
|
12 |
SUPPORTED_MODELS = ["gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", "gemini-pro"]
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
def user(user_prompt):
|
16 |
model_pair = sample(SUPPORTED_MODELS, 2)
|
@@ -85,6 +140,40 @@ def bot(state_a, state_b, request: gr.Request):
|
|
85 |
|
86 |
|
87 |
with gr.Blocks() as app:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
model_names = [gr.State(None), gr.State(None)]
|
89 |
responses = [gr.State(None), gr.State(None)]
|
90 |
|
@@ -98,6 +187,26 @@ with gr.Blocks() as app:
|
|
98 |
responses[0] = gr.Textbox(label="Model A", interactive=False)
|
99 |
responses[1] = gr.Textbox(label="Model B", interactive=False)
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
with gr.Accordion("Show models", open=False):
|
102 |
with gr.Row():
|
103 |
model_names[0] = gr.Textbox(label="Model A", interactive=False)
|
|
|
2 |
It provides a platform for comparing the responses of two LLMs.
|
3 |
"""
|
4 |
|
5 |
+
import enum
|
6 |
+
import json
|
7 |
from random import sample
|
8 |
+
from uuid import uuid4
|
9 |
|
10 |
from fastchat.serve import gradio_web_server
|
11 |
from fastchat.serve.gradio_web_server import bot_response
|
12 |
+
import firebase_admin
|
13 |
+
from firebase_admin import firestore
|
14 |
import gradio as gr
|
15 |
|
16 |
+
db_app = firebase_admin.initialize_app()
|
17 |
+
db = firestore.client()
|
18 |
+
|
19 |
# TODO(#1): Add more models.
|
20 |
SUPPORTED_MODELS = ["gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", "gemini-pro"]
|
21 |
|
22 |
+
SUPPORTED_TRANSLATION_LANGUAGES = ["Korean", "English"]
|
23 |
+
|
24 |
+
|
25 |
+
class ResponseType(enum.Enum):
|
26 |
+
SUMMARIZE = "Summarize"
|
27 |
+
TRANSLATE = "Translate"
|
28 |
+
|
29 |
+
|
30 |
+
class VoteOptions(enum.Enum):
|
31 |
+
MODEL_A = "Model A is better"
|
32 |
+
MODEL_B = "Model B is better"
|
33 |
+
TIE = "Tie"
|
34 |
+
|
35 |
+
|
36 |
+
def vote(state_a, state_b, vote_button, res_type, source_lang, target_lang):
|
37 |
+
winner = VoteOptions(vote_button).name.lower()
|
38 |
+
|
39 |
+
# The 'messages' field in the state is an array of arrays, a data type
|
40 |
+
# not supported by Firestore. Therefore, we convert it to a JSON string.
|
41 |
+
model_a_conv = json.dumps(state_a.dict())
|
42 |
+
model_b_conv = json.dumps(state_b.dict())
|
43 |
+
|
44 |
+
if res_type == ResponseType.SUMMARIZE.value:
|
45 |
+
doc_ref = db.collection("arena-summarizations").document(uuid4().hex)
|
46 |
+
doc_ref.set({
|
47 |
+
"model_a": state_a.model_name,
|
48 |
+
"model_b": state_b.model_name,
|
49 |
+
"model_a_conv": model_a_conv,
|
50 |
+
"model_b_conv": model_b_conv,
|
51 |
+
"winner": winner,
|
52 |
+
"timestamp": firestore.SERVER_TIMESTAMP
|
53 |
+
})
|
54 |
+
return
|
55 |
+
|
56 |
+
if res_type == ResponseType.TRANSLATE.value:
|
57 |
+
doc_ref = db.collection("arena-translations").document(uuid4().hex)
|
58 |
+
doc_ref.set({
|
59 |
+
"model_a": state_a.model_name,
|
60 |
+
"model_b": state_b.model_name,
|
61 |
+
"model_a_conv": model_a_conv,
|
62 |
+
"model_b_conv": model_b_conv,
|
63 |
+
"source_language": source_lang.lower(),
|
64 |
+
"target_language": target_lang.lower(),
|
65 |
+
"winner": winner,
|
66 |
+
"timestamp": firestore.SERVER_TIMESTAMP
|
67 |
+
})
|
68 |
+
|
69 |
|
70 |
def user(user_prompt):
|
71 |
model_pair = sample(SUPPORTED_MODELS, 2)
|
|
|
140 |
|
141 |
|
142 |
with gr.Blocks() as app:
|
143 |
+
with gr.Row():
|
144 |
+
response_type_radio = gr.Radio(
|
145 |
+
[response_type.value for response_type in ResponseType],
|
146 |
+
label="Response type",
|
147 |
+
info="Choose the type of response you want from the model.")
|
148 |
+
|
149 |
+
source_language = gr.Dropdown(
|
150 |
+
choices=SUPPORTED_TRANSLATION_LANGUAGES,
|
151 |
+
label="Source language",
|
152 |
+
info="Choose the source language for translation.",
|
153 |
+
interactive=True,
|
154 |
+
visible=False)
|
155 |
+
target_language = gr.Dropdown(
|
156 |
+
choices=SUPPORTED_TRANSLATION_LANGUAGES,
|
157 |
+
label="Target language",
|
158 |
+
info="Choose the target language for translation.",
|
159 |
+
interactive=True,
|
160 |
+
visible=False)
|
161 |
+
|
162 |
+
def update_language_visibility(response_type):
|
163 |
+
if response_type != ResponseType.TRANSLATE.value:
|
164 |
+
return {
|
165 |
+
source_language: gr.Dropdown(visible=False),
|
166 |
+
target_language: gr.Dropdown(visible=False)
|
167 |
+
}
|
168 |
+
|
169 |
+
return {
|
170 |
+
source_language: gr.Dropdown(visible=True),
|
171 |
+
target_language: gr.Dropdown(visible=True)
|
172 |
+
}
|
173 |
+
|
174 |
+
response_type_radio.change(update_language_visibility, response_type_radio,
|
175 |
+
[source_language, target_language])
|
176 |
+
|
177 |
model_names = [gr.State(None), gr.State(None)]
|
178 |
responses = [gr.State(None), gr.State(None)]
|
179 |
|
|
|
187 |
responses[0] = gr.Textbox(label="Model A", interactive=False)
|
188 |
responses[1] = gr.Textbox(label="Model B", interactive=False)
|
189 |
|
190 |
+
# TODO(#1): Display it only after the user submits the prompt.
|
191 |
+
# TODO(#1): Block voting if the response_type is not set.
|
192 |
+
# TODO(#1): Block voting if the user already voted.
|
193 |
+
with gr.Row():
|
194 |
+
option_a = gr.Button(VoteOptions.MODEL_A.value)
|
195 |
+
option_a.click(
|
196 |
+
vote, states +
|
197 |
+
[option_a, response_type_radio, source_language, target_language])
|
198 |
+
|
199 |
+
option_b = gr.Button("Model B is better")
|
200 |
+
option_b.click(
|
201 |
+
vote, states +
|
202 |
+
[option_b, response_type_radio, source_language, target_language])
|
203 |
+
|
204 |
+
tie = gr.Button("Tie")
|
205 |
+
tie.click(
|
206 |
+
vote,
|
207 |
+
states + [tie, response_type_radio, source_language, target_language])
|
208 |
+
|
209 |
+
# TODO(#1): Hide it until the user votes.
|
210 |
with gr.Accordion("Show models", open=False):
|
211 |
with gr.Row():
|
212 |
model_names[0] = gr.Textbox(label="Model A", interactive=False)
|
requirments.txt
CHANGED
@@ -6,26 +6,33 @@ altair==5.2.0
|
|
6 |
annotated-types==0.6.0
|
7 |
anyio==4.2.0
|
8 |
attrs==23.2.0
|
|
|
9 |
cachetools==5.3.2
|
10 |
certifi==2023.11.17
|
|
|
11 |
charset-normalizer==3.3.2
|
12 |
click==8.1.7
|
13 |
colorama==0.4.6
|
14 |
contourpy==1.2.0
|
|
|
15 |
cycler==0.12.1
|
16 |
distro==1.9.0
|
17 |
fastapi==0.109.0
|
18 |
ffmpy==0.3.1
|
19 |
filelock==3.13.1
|
|
|
20 |
fonttools==4.47.2
|
21 |
frozenlist==1.4.1
|
22 |
fschat==0.2.35
|
23 |
fsspec==2023.12.2
|
24 |
google-api-core==2.16.1
|
|
|
25 |
google-auth==2.27.0
|
|
|
26 |
google-cloud-aiplatform==1.40.0
|
27 |
google-cloud-bigquery==3.17.1
|
28 |
google-cloud-core==2.4.1
|
|
|
29 |
google-cloud-resource-manager==1.11.0
|
30 |
google-cloud-storage==2.14.0
|
31 |
google-crc32c==1.5.0
|
@@ -38,6 +45,7 @@ grpcio==1.60.0
|
|
38 |
grpcio-status==1.60.0
|
39 |
h11==0.14.0
|
40 |
httpcore==1.0.2
|
|
|
41 |
httpx==0.26.0
|
42 |
huggingface-hub==0.20.3
|
43 |
idna==3.6
|
@@ -52,6 +60,7 @@ MarkupSafe==2.1.4
|
|
52 |
matplotlib==3.8.2
|
53 |
mdurl==0.1.2
|
54 |
mpmath==1.3.0
|
|
|
55 |
multidict==6.0.4
|
56 |
networkx==3.2.1
|
57 |
nh3==0.2.15
|
@@ -68,10 +77,12 @@ protobuf==4.25.2
|
|
68 |
psutil==5.9.8
|
69 |
pyasn1==0.5.1
|
70 |
pyasn1-modules==0.3.0
|
|
|
71 |
pydantic==1.10.14
|
72 |
pydantic_core==2.16.1
|
73 |
pydub==0.25.1
|
74 |
Pygments==2.17.2
|
|
|
75 |
pyparsing==3.1.1
|
76 |
python-dateutil==2.8.2
|
77 |
python-multipart==0.0.6
|
@@ -105,6 +116,7 @@ transformers==4.37.2
|
|
105 |
typer==0.9.0
|
106 |
typing_extensions==4.9.0
|
107 |
tzdata==2023.4
|
|
|
108 |
urllib3==2.2.0
|
109 |
uvicorn==0.27.0.post1
|
110 |
wavedrom==2.0.3.post3
|
|
|
6 |
annotated-types==0.6.0
|
7 |
anyio==4.2.0
|
8 |
attrs==23.2.0
|
9 |
+
CacheControl==0.13.1
|
10 |
cachetools==5.3.2
|
11 |
certifi==2023.11.17
|
12 |
+
cffi==1.16.0
|
13 |
charset-normalizer==3.3.2
|
14 |
click==8.1.7
|
15 |
colorama==0.4.6
|
16 |
contourpy==1.2.0
|
17 |
+
cryptography==42.0.2
|
18 |
cycler==0.12.1
|
19 |
distro==1.9.0
|
20 |
fastapi==0.109.0
|
21 |
ffmpy==0.3.1
|
22 |
filelock==3.13.1
|
23 |
+
firebase-admin==6.4.0
|
24 |
fonttools==4.47.2
|
25 |
frozenlist==1.4.1
|
26 |
fschat==0.2.35
|
27 |
fsspec==2023.12.2
|
28 |
google-api-core==2.16.1
|
29 |
+
google-api-python-client==2.116.0
|
30 |
google-auth==2.27.0
|
31 |
+
google-auth-httplib2==0.2.0
|
32 |
google-cloud-aiplatform==1.40.0
|
33 |
google-cloud-bigquery==3.17.1
|
34 |
google-cloud-core==2.4.1
|
35 |
+
google-cloud-firestore==2.14.0
|
36 |
google-cloud-resource-manager==1.11.0
|
37 |
google-cloud-storage==2.14.0
|
38 |
google-crc32c==1.5.0
|
|
|
45 |
grpcio-status==1.60.0
|
46 |
h11==0.14.0
|
47 |
httpcore==1.0.2
|
48 |
+
httplib2==0.22.0
|
49 |
httpx==0.26.0
|
50 |
huggingface-hub==0.20.3
|
51 |
idna==3.6
|
|
|
60 |
matplotlib==3.8.2
|
61 |
mdurl==0.1.2
|
62 |
mpmath==1.3.0
|
63 |
+
msgpack==1.0.7
|
64 |
multidict==6.0.4
|
65 |
networkx==3.2.1
|
66 |
nh3==0.2.15
|
|
|
77 |
psutil==5.9.8
|
78 |
pyasn1==0.5.1
|
79 |
pyasn1-modules==0.3.0
|
80 |
+
pycparser==2.21
|
81 |
pydantic==1.10.14
|
82 |
pydantic_core==2.16.1
|
83 |
pydub==0.25.1
|
84 |
Pygments==2.17.2
|
85 |
+
PyJWT==2.8.0
|
86 |
pyparsing==3.1.1
|
87 |
python-dateutil==2.8.2
|
88 |
python-multipart==0.0.6
|
|
|
116 |
typer==0.9.0
|
117 |
typing_extensions==4.9.0
|
118 |
tzdata==2023.4
|
119 |
+
uritemplate==4.1.1
|
120 |
urllib3==2.2.0
|
121 |
uvicorn==0.27.0.post1
|
122 |
wavedrom==2.0.3.post3
|