Spaces:
Sleeping
Sleeping
Commit
•
9914aa0
1
Parent(s):
5b09dd5
Refactor app.py for improved readability and maintainability
Browse files
app.py
CHANGED
@@ -2,19 +2,29 @@ import gradio as gr
|
|
2 |
import markdown
|
3 |
import requests
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def create_chat_html(messages, dataset_id, offset, compare_mode=False, column=""):
|
7 |
chat_html = ""
|
8 |
-
turn_number =
|
9 |
-
for i in range(0, len(messages), 2):
|
10 |
user_message = messages[i]
|
11 |
system_message = messages[i + 1] if i + 1 < len(messages) else None
|
12 |
user_role = user_message["role"]
|
13 |
user_content = user_message["content"]
|
14 |
user_content_html = markdown.markdown(user_content)
|
15 |
user_content_length = len(user_content)
|
16 |
-
user_html =
|
17 |
-
|
|
|
|
|
18 |
user_html += (
|
19 |
f"<strong>Turn {turn_number} - {user_role.capitalize()}:</strong><br>"
|
20 |
)
|
@@ -27,8 +37,10 @@ def create_chat_html(messages, dataset_id, offset, compare_mode=False, column=""
|
|
27 |
system_content = system_message["content"]
|
28 |
system_content_html = markdown.markdown(system_content)
|
29 |
system_content_length = len(system_content)
|
30 |
-
system_html =
|
31 |
-
|
|
|
|
|
32 |
system_html += f"<strong>{system_role.capitalize()}:</strong><br>"
|
33 |
system_html += (
|
34 |
f"<em>Length: {system_content_length} characters</em><br><br>"
|
@@ -36,8 +48,6 @@ def create_chat_html(messages, dataset_id, offset, compare_mode=False, column=""
|
|
36 |
system_html += f"{system_content_html}"
|
37 |
system_html += "</div></div>"
|
38 |
chat_html += system_html
|
39 |
-
turn_number += 1
|
40 |
-
|
41 |
if compare_mode:
|
42 |
chat_html = f'<div class="column {column}">{chat_html}</div>'
|
43 |
|
@@ -138,10 +148,7 @@ def fetch_data(
|
|
138 |
|
139 |
|
140 |
def update_column_names(compare_mode):
|
141 |
-
if compare_mode
|
142 |
-
return "chosen", "rejected"
|
143 |
-
else:
|
144 |
-
return "", ""
|
145 |
|
146 |
|
147 |
with gr.Blocks() as demo:
|
@@ -156,8 +163,10 @@ with gr.Blocks() as demo:
|
|
156 |
"This app allows you to view chat data from a Hugging Face dataset via the datasets viewer API. ChatML formatted data consists of messages formatted as lists of dictionaries, where each dictionary represents a message with a 'role' (e.g., 'user' or 'assistant') and 'content'. This is a very basic demo built in less than 30 minutes but it hopefully gives you an idea of the kinds of things you can build with the datasets viewer. You can get started building your own apps by going to the datasets viewer documentation [here](https://huggingface.co/docs/datasets-server/index)."
|
157 |
)
|
158 |
with gr.Row():
|
159 |
-
dataset_id = gr.
|
160 |
-
|
|
|
|
|
161 |
)
|
162 |
chosen_column = gr.Textbox(
|
163 |
label="Chosen Column",
|
@@ -211,4 +220,4 @@ with gr.Blocks() as demo:
|
|
211 |
outputs=[dataset_link, output_html, current_offset],
|
212 |
)
|
213 |
|
214 |
-
demo.launch(debug=True
|
|
|
2 |
import markdown
|
3 |
import requests
|
4 |
|
5 |
+
example_dpo_datasets = [
|
6 |
+
"mlabonne/orpo-dpo-mix-40k",
|
7 |
+
"argilla/ultrafeedback-binarized-preferences-cleaned",
|
8 |
+
"argilla/Capybara-Preferences",
|
9 |
+
]
|
10 |
+
general_examples = ["davanstrien/cosmochat", "HuggingFaceH4/no_robots"]
|
11 |
+
|
12 |
+
datasets_examples = example_dpo_datasets + general_examples
|
13 |
+
|
14 |
|
15 |
def create_chat_html(messages, dataset_id, offset, compare_mode=False, column=""):
|
16 |
chat_html = ""
|
17 |
+
for turn_number, i in enumerate(range(0, len(messages), 2), start=1):
|
|
|
18 |
user_message = messages[i]
|
19 |
system_message = messages[i + 1] if i + 1 < len(messages) else None
|
20 |
user_role = user_message["role"]
|
21 |
user_content = user_message["content"]
|
22 |
user_content_html = markdown.markdown(user_content)
|
23 |
user_content_length = len(user_content)
|
24 |
+
user_html = (
|
25 |
+
'<div class="user-message" style="justify-content: right;">'
|
26 |
+
+ '<div class="message-content">'
|
27 |
+
)
|
28 |
user_html += (
|
29 |
f"<strong>Turn {turn_number} - {user_role.capitalize()}:</strong><br>"
|
30 |
)
|
|
|
37 |
system_content = system_message["content"]
|
38 |
system_content_html = markdown.markdown(system_content)
|
39 |
system_content_length = len(system_content)
|
40 |
+
system_html = (
|
41 |
+
'<div class="system-message" style="justify-content: left;">'
|
42 |
+
+ '<div class="message-content">'
|
43 |
+
)
|
44 |
system_html += f"<strong>{system_role.capitalize()}:</strong><br>"
|
45 |
system_html += (
|
46 |
f"<em>Length: {system_content_length} characters</em><br><br>"
|
|
|
48 |
system_html += f"{system_content_html}"
|
49 |
system_html += "</div></div>"
|
50 |
chat_html += system_html
|
|
|
|
|
51 |
if compare_mode:
|
52 |
chat_html = f'<div class="column {column}">{chat_html}</div>'
|
53 |
|
|
|
148 |
|
149 |
|
150 |
def update_column_names(compare_mode):
|
151 |
+
return ("chosen", "rejected") if compare_mode else ("", "")
|
|
|
|
|
|
|
152 |
|
153 |
|
154 |
with gr.Blocks() as demo:
|
|
|
163 |
"This app allows you to view chat data from a Hugging Face dataset via the datasets viewer API. ChatML formatted data consists of messages formatted as lists of dictionaries, where each dictionary represents a message with a 'role' (e.g., 'user' or 'assistant') and 'content'. This is a very basic demo built in less than 30 minutes but it hopefully gives you an idea of the kinds of things you can build with the datasets viewer. You can get started building your own apps by going to the datasets viewer documentation [here](https://huggingface.co/docs/datasets-server/index)."
|
164 |
)
|
165 |
with gr.Row():
|
166 |
+
dataset_id = gr.Dropdown(
|
167 |
+
datasets_examples,
|
168 |
+
label="Dataset ID",
|
169 |
+
allow_custom_value=True,
|
170 |
)
|
171 |
chosen_column = gr.Textbox(
|
172 |
label="Chosen Column",
|
|
|
220 |
outputs=[dataset_link, output_html, current_offset],
|
221 |
)
|
222 |
|
223 |
+
demo.launch(debug=True)
|