Spaces:
Sleeping
Sleeping
openreviewer
commited on
Commit
•
9d2f40b
1
Parent(s):
38a86d9
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
@@ -22,11 +22,11 @@ prompt_dir = 'iclr2024'
|
|
22 |
api_keys = {
|
23 |
'openai_api_key': os.environ.get('openai_api_key'),
|
24 |
'claude_api_key': os.environ.get('anthropic_api_key'),
|
25 |
-
'gemini_api_key': os.environ.get('
|
26 |
'commandr_api_key': os.environ.get('cohere_api_key')
|
27 |
}
|
28 |
|
29 |
-
use_real_api =
|
30 |
|
31 |
# Function to generate a paper_id using SHA-512 hash
|
32 |
def generate_paper_id(paper_content):
|
@@ -204,50 +204,49 @@ def setup_interface():
|
|
204 |
|
205 |
with gr.TabItem("Leaderboard"):
|
206 |
gr.Markdown("## Leaderboard")
|
207 |
-
|
208 |
-
# Fetch the leaderboard data from the database
|
209 |
-
leaderboard_data = get_leaderboard()
|
210 |
-
# print(leaderboard_data)
|
211 |
-
|
212 |
-
# Create the leaderboard HTML dynamically
|
213 |
-
leaderboard_html = """
|
214 |
-
<table style="width:100%; border: 1px solid #444; border-collapse: collapse; font-family: Arial, sans-serif; background-color: #2b2b2b;">
|
215 |
-
<thead>
|
216 |
-
<tr style="border: 1px solid #444; padding: 12px; background-color: #1a1a1a;">
|
217 |
-
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Rank</th>
|
218 |
-
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Model</th>
|
219 |
-
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Arena Elo</th>
|
220 |
-
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">95% CI</th>
|
221 |
-
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Votes</th>
|
222 |
-
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Organization</th>
|
223 |
-
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">License</th>
|
224 |
-
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Knowledge Cutoff</th>
|
225 |
-
</tr>
|
226 |
-
</thead>
|
227 |
-
<tbody>
|
228 |
-
"""
|
229 |
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
<
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
|
|
|
|
|
|
|
|
|
|
242 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
251 |
|
252 |
logging.debug("Gradio interface setup complete.")
|
253 |
return demo
|
@@ -257,3 +256,4 @@ if __name__ == "__main__":
|
|
257 |
logging.basicConfig(level=logging.INFO)
|
258 |
demo = setup_interface()
|
259 |
demo.launch()
|
|
|
|
22 |
api_keys = {
|
23 |
'openai_api_key': os.environ.get('openai_api_key'),
|
24 |
'claude_api_key': os.environ.get('anthropic_api_key'),
|
25 |
+
'gemini_api_key': os.environ.get('gemini_api_key'),
|
26 |
'commandr_api_key': os.environ.get('cohere_api_key')
|
27 |
}
|
28 |
|
29 |
+
use_real_api = True
|
30 |
|
31 |
# Function to generate a paper_id using SHA-512 hash
|
32 |
def generate_paper_id(paper_content):
|
|
|
204 |
|
205 |
with gr.TabItem("Leaderboard"):
|
206 |
gr.Markdown("## Leaderboard")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
+
def refresh_leaderboard():
|
209 |
+
leaderboard_data = get_leaderboard()
|
210 |
+
leaderboard_html = """
|
211 |
+
<table style="width:100%; border: 1px solid #444; border-collapse: collapse; font-family: Arial, sans-serif; background-color: #2b2b2b;">
|
212 |
+
<thead>
|
213 |
+
<tr style="border: 1px solid #444; padding: 12px; background-color: #1a1a1a;">
|
214 |
+
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Rank</th>
|
215 |
+
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Model</th>
|
216 |
+
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Arena Elo</th>
|
217 |
+
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">95% CI</th>
|
218 |
+
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Votes</th>
|
219 |
+
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Organization</th>
|
220 |
+
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">License</th>
|
221 |
+
<th style="border: 1px solid #444; padding: 12px; color: #ddd;">Knowledge Cutoff</th>
|
222 |
+
</tr>
|
223 |
+
</thead>
|
224 |
+
<tbody>
|
225 |
"""
|
226 |
+
|
227 |
+
for rank, model in enumerate(leaderboard_data, start=1):
|
228 |
+
leaderboard_html += f"""
|
229 |
+
<tr style="border: 1px solid #444; padding: 12px;">
|
230 |
+
<td style="border: 1px solid #444; padding: 12px; color: #ddd;">{rank}</td>
|
231 |
+
<td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['ModelID']}</td>
|
232 |
+
<td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['EloScore']}</td>
|
233 |
+
<td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['CI_Lower']} - {model['CI_Upper']}</td>
|
234 |
+
<td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['Votes']}</td>
|
235 |
+
<td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['Organization']}</td>
|
236 |
+
<td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['License']}</td>
|
237 |
+
<td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['KnowledgeCutoff']}</td>
|
238 |
+
</tr>
|
239 |
+
"""
|
240 |
|
241 |
+
leaderboard_html += """
|
242 |
+
</tbody>
|
243 |
+
</table>
|
244 |
+
"""
|
245 |
+
return gr.update(value=leaderboard_html)
|
246 |
+
new_html = get_leaderboard()
|
247 |
+
leaderboard_html = gr.HTML(new_html)
|
248 |
+
refresh_button = gr.Button("Refresh Leaderboard")
|
249 |
+
refresh_button.click(fn=refresh_leaderboard, inputs=[], outputs=[leaderboard_html])
|
250 |
|
251 |
logging.debug("Gradio interface setup complete.")
|
252 |
return demo
|
|
|
256 |
logging.basicConfig(level=logging.INFO)
|
257 |
demo = setup_interface()
|
258 |
demo.launch()
|
259 |
+
|
logging_config.py
CHANGED
@@ -3,7 +3,7 @@ import logging
|
|
3 |
def setup_logging():
|
4 |
logging.basicConfig(
|
5 |
filename="arena.log",
|
6 |
-
level=logging.
|
7 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
8 |
)
|
9 |
logging.info("Logging setup complete.")
|
|
|
3 |
def setup_logging():
|
4 |
logging.basicConfig(
|
5 |
filename="arena.log",
|
6 |
+
level=logging.INFO, # Change to INFO level
|
7 |
format='%(asctime)s - %(levelname)s - %(message)s'
|
8 |
)
|
9 |
logging.info("Logging setup complete.")
|
models.py
CHANGED
@@ -22,7 +22,7 @@ class PaperProcessor:
|
|
22 |
def __init__(self, prompt_dir, model, openai_api_key, claude_api_key, gemini_api_key, commandr_api_key):
|
23 |
self.prompt_dir = prompt_dir
|
24 |
self.model = model
|
25 |
-
self.openai_api_key = openai_api_key
|
26 |
self.claude_api_key = claude_api_key
|
27 |
self.gemini_api_key = gemini_api_key
|
28 |
self.commandr_api_key = commandr_api_key
|
@@ -55,7 +55,7 @@ class PaperProcessor:
|
|
55 |
logging.info(f"Sending the following prompt to {model_type}: {prompt}")
|
56 |
|
57 |
try:
|
58 |
-
if model_type == 'gpt':
|
59 |
client = OpenAI(api_key=self.openai_api_key)
|
60 |
messages = [{"role": "system", "content": system_role}, {"role": "user", "content": prompt}]
|
61 |
completion = client.chat.completions.create(
|
@@ -63,9 +63,21 @@ class PaperProcessor:
|
|
63 |
messages=messages,
|
64 |
temperature=1
|
65 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
return completion.choices[0].message.content.strip()
|
67 |
|
68 |
-
elif model_type == 'claude':
|
69 |
client = anthropic.Anthropic(api_key=self.claude_api_key)
|
70 |
response = client.messages.create(
|
71 |
model='claude-3-opus-20240229',
|
@@ -74,25 +86,29 @@ class PaperProcessor:
|
|
74 |
temperature=0.5,
|
75 |
messages=[{"role": "user", "content": prompt}]
|
76 |
)
|
|
|
77 |
return response.content[0].text
|
78 |
|
79 |
-
elif model_type == '
|
80 |
co = cohere.Client(self.commandr_api_key)
|
81 |
response = co.chat(
|
82 |
model="command-r-plus",
|
83 |
message=prompt,
|
84 |
preamble=system_role
|
85 |
)
|
|
|
86 |
return response.text
|
87 |
|
88 |
-
elif model_type == 'gemini':
|
89 |
genai.configure(api_key=self.gemini_api_key)
|
90 |
model = genai.GenerativeModel('gemini-pro')
|
91 |
response = model.generate_content(prompt)
|
|
|
92 |
return response.candidates[0].content.parts[0].text
|
93 |
|
94 |
except Exception as e:
|
95 |
logging.error(f"Exception occurred: {e}")
|
|
|
96 |
return None
|
97 |
|
98 |
def is_content_appropriate(self, content):
|
|
|
22 |
def __init__(self, prompt_dir, model, openai_api_key, claude_api_key, gemini_api_key, commandr_api_key):
|
23 |
self.prompt_dir = prompt_dir
|
24 |
self.model = model
|
25 |
+
self.openai_api_key = openai_api_key
|
26 |
self.claude_api_key = claude_api_key
|
27 |
self.gemini_api_key = gemini_api_key
|
28 |
self.commandr_api_key = commandr_api_key
|
|
|
55 |
logging.info(f"Sending the following prompt to {model_type}: {prompt}")
|
56 |
|
57 |
try:
|
58 |
+
if model_type == 'gpt-4-turbo-2024-04-09':
|
59 |
client = OpenAI(api_key=self.openai_api_key)
|
60 |
messages = [{"role": "system", "content": system_role}, {"role": "user", "content": prompt}]
|
61 |
completion = client.chat.completions.create(
|
|
|
63 |
messages=messages,
|
64 |
temperature=1
|
65 |
)
|
66 |
+
print(completion)
|
67 |
+
return completion.choices[0].message.content.strip()
|
68 |
+
|
69 |
+
elif model_type == 'gpt-4o':
|
70 |
+
client = OpenAI(api_key=self.openai_api_key)
|
71 |
+
messages = [{"role": "system", "content": system_role}, {"role": "user", "content": prompt}]
|
72 |
+
completion = client.chat.completions.create(
|
73 |
+
model="gpt-4o",
|
74 |
+
messages=messages,
|
75 |
+
temperature=1
|
76 |
+
)
|
77 |
+
print(completion)
|
78 |
return completion.choices[0].message.content.strip()
|
79 |
|
80 |
+
elif model_type == 'claude-3-opus-20240229':
|
81 |
client = anthropic.Anthropic(api_key=self.claude_api_key)
|
82 |
response = client.messages.create(
|
83 |
model='claude-3-opus-20240229',
|
|
|
86 |
temperature=0.5,
|
87 |
messages=[{"role": "user", "content": prompt}]
|
88 |
)
|
89 |
+
print(response)
|
90 |
return response.content[0].text
|
91 |
|
92 |
+
elif model_type == 'command-r-plus':
|
93 |
co = cohere.Client(self.commandr_api_key)
|
94 |
response = co.chat(
|
95 |
model="command-r-plus",
|
96 |
message=prompt,
|
97 |
preamble=system_role
|
98 |
)
|
99 |
+
print(response)
|
100 |
return response.text
|
101 |
|
102 |
+
elif model_type == 'gemini-pro':
|
103 |
genai.configure(api_key=self.gemini_api_key)
|
104 |
model = genai.GenerativeModel('gemini-pro')
|
105 |
response = model.generate_content(prompt)
|
106 |
+
print(response)
|
107 |
return response.candidates[0].content.parts[0].text
|
108 |
|
109 |
except Exception as e:
|
110 |
logging.error(f"Exception occurred: {e}")
|
111 |
+
print(e)
|
112 |
return None
|
113 |
|
114 |
def is_content_appropriate(self, content):
|
utils.py
CHANGED
@@ -3,7 +3,8 @@ import os
|
|
3 |
import logging
|
4 |
import random
|
5 |
from models import Paper, PaperProcessor
|
6 |
-
|
|
|
7 |
|
8 |
def extract_text_from_pdf(filename):
|
9 |
with fitz.open(filename) as pdf_document:
|
@@ -33,13 +34,24 @@ def process_paper(pdf_file, paper_dir, prompt_dir, api_keys):
|
|
33 |
paper = Paper(pdf_file.name if hasattr(pdf_file, 'name')
|
34 |
else os.path.basename(pdf_path), extracted_text)
|
35 |
|
36 |
-
models = ['gpt', 'claude', 'gemini', '
|
37 |
selected_models = random.sample(models, 2)
|
38 |
|
39 |
reviews = []
|
40 |
-
|
|
|
41 |
processor = PaperProcessor(prompt_dir, model, **api_keys)
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
logging.debug(f"Reviews generated: {reviews}")
|
45 |
return reviews, selected_models
|
|
|
3 |
import logging
|
4 |
import random
|
5 |
from models import Paper, PaperProcessor
|
6 |
+
import concurrent.futures
|
7 |
+
from concurrent.futures import ThreadPoolExecutor
|
8 |
|
9 |
def extract_text_from_pdf(filename):
|
10 |
with fitz.open(filename) as pdf_document:
|
|
|
34 |
paper = Paper(pdf_file.name if hasattr(pdf_file, 'name')
|
35 |
else os.path.basename(pdf_path), extracted_text)
|
36 |
|
37 |
+
models = ['gpt-4-turbo-2024-04-09', 'gpt-4o', 'claude-3-opus-20240229', 'gemini-pro', 'command-r-plus']
|
38 |
selected_models = random.sample(models, 2)
|
39 |
|
40 |
reviews = []
|
41 |
+
|
42 |
+
def process_with_model(model):
|
43 |
processor = PaperProcessor(prompt_dir, model, **api_keys)
|
44 |
+
return processor.process_paper(paper)
|
45 |
+
|
46 |
+
with ThreadPoolExecutor() as executor:
|
47 |
+
future_to_model = {executor.submit(process_with_model, model): model for model in selected_models}
|
48 |
+
for future in concurrent.futures.as_completed(future_to_model):
|
49 |
+
model = future_to_model[future]
|
50 |
+
try:
|
51 |
+
review_text = future.result()
|
52 |
+
reviews.append(review_text)
|
53 |
+
except Exception as exc:
|
54 |
+
logging.error(f"Model {model} generated an exception: {exc}")
|
55 |
+
|
56 |
logging.debug(f"Reviews generated: {reviews}")
|
57 |
return reviews, selected_models
|