Spaces:
Sleeping
Sleeping
Update app
Browse files- Untitled-1.ipynb +0 -34
- app.py +34 -4
Untitled-1.ipynb
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": null,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"10 ** 10"
|
10 |
-
]
|
11 |
-
},
|
12 |
-
{
|
13 |
-
"cell_type": "markdown",
|
14 |
-
"metadata": {},
|
15 |
-
"source": [
|
16 |
-
"1+1"
|
17 |
-
]
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"cell_type": "markdown",
|
21 |
-
"metadata": {},
|
22 |
-
"source": [
|
23 |
-
"# Testing dev mode"
|
24 |
-
]
|
25 |
-
}
|
26 |
-
],
|
27 |
-
"metadata": {
|
28 |
-
"language_info": {
|
29 |
-
"name": "python"
|
30 |
-
}
|
31 |
-
},
|
32 |
-
"nbformat": 4,
|
33 |
-
"nbformat_minor": 2
|
34 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,7 +1,37 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
+
# LLM performance data with scores
|
4 |
+
performance_data = {
|
5 |
+
"Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)],
|
6 |
+
"Graduate level reasoning": [("Claude 3 Opus", 50.4), ("Claude 3 Sonnet", 40.4), ("GPT-4", 35.7)],
|
7 |
+
"Grade school math": [("Claude 3 Opus", 95.0), ("Gemini 1.0 Ultra", 94.4), ("GPT-4", 92.0)],
|
8 |
+
"Math problem-solving": [("Claude 3 Opus", 60.1), ("Gemini 1.0 Ultra", 53.2), ("GPT-4", 52.9)],
|
9 |
+
"Multilingual math": [("Claude 3 Opus", 90.7), ("Claude 3 Sonnet", 83.5), ("Gemini 1.0 Ultra", 79.0)],
|
10 |
+
"Code": [("Claude 3 Opus", 84.9), ("Gemini 1.0 Ultra", 74.4), ("Claude 3 Haiku", 75.9)],
|
11 |
+
"Reasoning over text": [("Claude 3 Opus", 83.1), ("Gemini 1.0 Ultra", 82.4), ("GPT-4", 80.9)],
|
12 |
+
"Mixed evaluations": [("Claude 3 Opus", 86.8), ("Gemini 1.0 Ultra", 83.6), ("GPT-4", 83.1)],
|
13 |
+
"Knowledge Q&A": [("Claude 3 Opus", 96.4), ("GPT-4", 96.3), ("Claude 3 Sonnet", 93.2)],
|
14 |
+
"Common Knowledge": [("Claude 3 Opus", 95.4), ("GPT-4", 95.3), ("Gemini 1.0 Ultra", 87.8)],
|
15 |
+
}
|
16 |
|
17 |
+
def recommend_llm(task):
|
18 |
+
recommendations = performance_data.get(task, [])
|
19 |
+
if not recommendations:
|
20 |
+
return "No data available"
|
21 |
+
recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True)
|
22 |
+
result = f"For {task}, the recommended LLMs are:\n"
|
23 |
+
for i, (model, score) in enumerate(recommendations_sorted):
|
24 |
+
result += f"{i+1}. {model} with a score of {score}%\n"
|
25 |
+
return result
|
26 |
+
|
27 |
+
# Gradio interface
|
28 |
+
interface = gr.Interface(
|
29 |
+
fn=recommend_llm,
|
30 |
+
inputs=gr.Dropdown(list(performance_data.keys()), label="Select Task"),
|
31 |
+
outputs=gr.Textbox(label="LLM Recommendations"),
|
32 |
+
title="LLM Recommendation App",
|
33 |
+
description="Select a task to get recommendations for the best LLMs based on performance data."
|
34 |
+
)
|
35 |
+
|
36 |
+
# Launch the app
|
37 |
+
interface.launch()
|