Spaces:
Sleeping
Sleeping
first update
Browse files
app.py
CHANGED
@@ -3,14 +3,22 @@ from datasets import load_dataset, Dataset
|
|
3 |
from collections import defaultdict
|
4 |
import random
|
5 |
import requests
|
6 |
-
import os
|
|
|
|
|
|
|
7 |
# Load the source dataset
|
8 |
source_dataset = load_dataset("vietdata/eng_echo", split="train")
|
9 |
-
|
|
|
10 |
|
11 |
# Initialize variables
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
|
15 |
def authenticate(user_id):
|
16 |
|
@@ -25,73 +33,120 @@ def authenticate(user_id):
|
|
25 |
|
26 |
return response.status_code == 200
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
# Helper function to get the next text for translation
|
29 |
-
def
|
30 |
-
|
31 |
-
# eligible_texts = [text for text in source_texts if len(translations[text]) < 10]
|
32 |
-
# if not eligible_texts:
|
33 |
-
# return "All texts are fully translated."
|
34 |
-
|
35 |
-
# Select a random eligible text for translation
|
36 |
-
next_text = random.choice(source_texts)
|
37 |
return next_text
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Function to handle translation submission
|
40 |
-
def submit_translation(user_id,
|
41 |
-
|
42 |
-
if
|
43 |
-
|
|
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
48 |
|
49 |
-
|
50 |
-
else:
|
51 |
-
return "This text already has 10 translations. Please request a new text."
|
52 |
|
53 |
-
# Function to save completed translations to 'translated_echo'
|
54 |
-
def save_to_translated_echo():
|
55 |
-
global translations, processed_data
|
56 |
|
57 |
-
# Gather translations with exactly 10 versions
|
58 |
-
completed_translations = [
|
59 |
-
{"query": text, "translations": [t[1] for t in translations[text]]}
|
60 |
-
for text in translations if len(translations[text]) == 10
|
61 |
-
]
|
62 |
|
63 |
-
|
64 |
-
|
|
|
65 |
|
66 |
-
|
67 |
-
|
|
|
68 |
|
69 |
-
|
70 |
-
|
|
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
# Append to Hugging Face dataset (dummy function call)
|
|
|
73 |
translated_dataset.push_to_hub("vietdata/translated_echo", split="train")
|
74 |
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
# Simulated user data for demonstration
|
78 |
-
user_data = {"hello": "hello"}
|
79 |
|
80 |
# Sample English text to translate
|
81 |
-
english_text =
|
82 |
|
83 |
# User session dictionary to store logged-in status
|
84 |
user_sessions = {}
|
85 |
|
86 |
-
def login(username, state):
|
87 |
state[0] = username
|
88 |
-
|
|
|
89 |
# Authenticate user
|
90 |
if authenticate(username):
|
91 |
#user_sessions[username] = True
|
92 |
-
return f"Welcome, {username}!", gr.update(visible=False), gr.update(visible=True),
|
93 |
else:
|
94 |
-
return "Invalid username or password.", gr.update(visible=True), gr.update(visible=False), ""
|
95 |
|
96 |
def logout(username):
|
97 |
# Log out user and reset session
|
@@ -99,19 +154,28 @@ def logout(username):
|
|
99 |
del user_sessions[username]
|
100 |
return "Logged out. Please log in again.", gr.update(visible=True), gr.update(visible=False)
|
101 |
|
102 |
-
def
|
103 |
try:
|
104 |
-
submit_translation(state[0],
|
105 |
-
origin = job_input
|
106 |
# Save the translation and provide feedback
|
107 |
-
|
|
|
108 |
except Exception as e:
|
|
|
|
|
109 |
print(e)
|
110 |
-
return "Error please try submit again!",
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
# Define the Gradio interface
|
113 |
with gr.Blocks() as demo:
|
114 |
state = gr.State([None])
|
|
|
115 |
# Login section
|
116 |
with gr.Column(visible=True) as login_section:
|
117 |
username_input = gr.Textbox(placeholder="Enter your token", label="Token ID")
|
@@ -120,18 +184,31 @@ with gr.Blocks() as demo:
|
|
120 |
|
121 |
# Translation section (initially hidden)
|
122 |
with gr.Column(visible=False) as translation_section:
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
translation_output = gr.Textbox(label="Submission Status", interactive=False)
|
127 |
logout_button = gr.Button("Logout")
|
128 |
|
129 |
# Button functions
|
130 |
login_button.click(
|
131 |
-
login, inputs=[username_input, state], outputs=[login_output, login_section, translation_section,
|
132 |
)
|
133 |
submit_button.click(
|
134 |
-
|
135 |
)
|
136 |
logout_button.click(
|
137 |
logout, inputs=[username_input], outputs=[login_output, login_section, translation_section]
|
|
|
3 |
from collections import defaultdict
|
4 |
import random
|
5 |
import requests
|
6 |
+
import os
|
7 |
+
from langdetect import detect
|
8 |
+
import pandas as pd
|
9 |
+
|
10 |
# Load the source dataset
|
11 |
source_dataset = load_dataset("vietdata/eng_echo", split="train")
|
12 |
+
eng_texts = list(set(source_dataset["query"] + source_dataset["positive"] + source_dataset["negative"]))
|
13 |
+
vi_texts = []
|
14 |
|
15 |
# Initialize variables
|
16 |
+
envi_translations = []
|
17 |
+
vien_translations = []
|
18 |
+
|
19 |
+
trans2score = dict()
|
20 |
+
packages = [[0, "None", "None", 0, float('inf'), float("inf")]]
|
21 |
+
num = 1000
|
22 |
|
23 |
def authenticate(user_id):
|
24 |
|
|
|
33 |
|
34 |
return response.status_code == 200
|
35 |
|
36 |
+
def send_score(user_id, score):
|
37 |
+
max_retries = 10
|
38 |
+
while max_retries > 0:
|
39 |
+
url = "https://intern-api.imtaedu.com/api/subnets/1/grade"
|
40 |
+
|
41 |
+
payload = {
|
42 |
+
"token": user_id,
|
43 |
+
"comment": "Good job!",
|
44 |
+
"grade": score,
|
45 |
+
"submitted_at": "2021-01-01 00:00:00",
|
46 |
+
"graded_at": "2021-01-01 00:00:00"
|
47 |
+
}
|
48 |
+
headers = {
|
49 |
+
"Content-Type": "application/json",
|
50 |
+
"Accept": "application/json",
|
51 |
+
"X-Public-Api-Key": os.environ['ADMIN']
|
52 |
+
}
|
53 |
+
|
54 |
+
response = requests.post(url, json=payload, headers=headers)
|
55 |
+
if response.status_code == 200:
|
56 |
+
return True
|
57 |
+
print(response)
|
58 |
+
max_retries -= 1
|
59 |
+
return False
|
60 |
+
|
61 |
# Helper function to get the next text for translation
|
62 |
+
def get_next_en_text(user_id):
|
63 |
+
next_text = random.choice(eng_texts)
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
return next_text
|
65 |
|
66 |
+
def get_next_package(user_id):
|
67 |
+
if len(packages) == 0:
|
68 |
+
return None
|
69 |
+
|
70 |
+
save = False
|
71 |
+
count = 0
|
72 |
+
for i in range(1, len(packages)):
|
73 |
+
if count >= num:
|
74 |
+
save_to_translated_echo()
|
75 |
+
return packages[0]
|
76 |
+
if packages[i][-2] > 0 :#and packages[i][0] != user_id:
|
77 |
+
packages[0][-2] -= 1
|
78 |
+
return packages[i]
|
79 |
+
if packages[i][-2] == 0 and packages[i][-2] == packages[i][-1]:
|
80 |
+
count += 1
|
81 |
+
return packages[0]
|
82 |
+
|
83 |
# Function to handle translation submission
|
84 |
+
def submit_translation(user_id, package, vi_translation, en_text, en_translation, vi_text):
|
85 |
+
assert vi_translation != ""
|
86 |
+
if vi_translation != "" and detect(vi_translation) != "vi":
|
87 |
+
gr.Warning("Bản dịch không phải tiếng Việt", duration=5)
|
88 |
+
assert 4==5
|
89 |
|
90 |
+
if en_translation != "" and detect(en_translation) != "en":
|
91 |
+
print(en_translation, detect(en_translation))
|
92 |
+
gr.Warning("Bản dịch không phải tiếng Anh", duration=5)
|
93 |
+
assert 4==5
|
94 |
|
95 |
+
first_score = gg_score(en_text, vi_translation, target="vi")
|
|
|
|
|
96 |
|
|
|
|
|
|
|
97 |
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
+
second_score = miner_score(package[0][1], en_translation)
|
100 |
+
ref_score = gg_score(package[0][2], en_translation, target="en")
|
101 |
+
trust_score = 1 - abs(second_score - ref_score)/max((second_score+ref_score)/2, 0.1)
|
102 |
|
103 |
+
packages.append([user_id, en_text, vi_translation, first_score*trust_score*0.5, 10, 10])
|
104 |
+
package[0][3] += second_score*trust_score*0.05
|
105 |
+
package[0][-1] -= 1
|
106 |
|
107 |
+
assert send_score(user_id, first_score*trust_score*0.5)
|
108 |
+
if package[0][0] != 0:
|
109 |
+
assert send_score(package[0][0], second_score*trust_score*0.05)
|
110 |
|
111 |
+
# Function to save completed translations to 'translated_echo'
|
112 |
+
def save_to_translated_echo():
|
113 |
+
try:
|
114 |
+
old_dataset = load_dataset("vietdata/translated_echo", split="train")
|
115 |
+
old_dataset = old_dataset.to_pandas()
|
116 |
+
except:
|
117 |
+
old_dataset = pd.DataFrame([], columns=["user_id", "source", "target", "score"])
|
118 |
+
|
119 |
+
new_dataset = pd.DataFrame([i[:4] for i in packages[:num]], columns=["user_id", "source", "target", "score"])
|
120 |
+
new_dataset = pd.concat([old_dataset, new_dataset])
|
121 |
# Append to Hugging Face dataset (dummy function call)
|
122 |
+
translated_dataset = Dataset.from_pandas(new_dataset)
|
123 |
translated_dataset.push_to_hub("vietdata/translated_echo", split="train")
|
124 |
|
125 |
+
del new_dataset
|
126 |
+
del old_dataset
|
127 |
+
del translated_dataset
|
128 |
+
import gc
|
129 |
+
gc.collect()
|
130 |
+
for i in range(num):
|
131 |
+
packages.pop(1)
|
132 |
|
|
|
|
|
133 |
|
134 |
# Sample English text to translate
|
135 |
+
english_text = None
|
136 |
|
137 |
# User session dictionary to store logged-in status
|
138 |
user_sessions = {}
|
139 |
|
140 |
+
def login(username, state, package):
|
141 |
state[0] = username
|
142 |
+
package[0] = get_next_package(user_id=username)
|
143 |
+
|
144 |
# Authenticate user
|
145 |
if authenticate(username):
|
146 |
#user_sessions[username] = True
|
147 |
+
return f"Welcome, {username}!", gr.update(visible=False), gr.update(visible=True), get_next_en_text(username), package[0][2]
|
148 |
else:
|
149 |
+
return "Invalid username or password.", gr.update(visible=True), gr.update(visible=False), "", ""
|
150 |
|
151 |
def logout(username):
|
152 |
# Log out user and reset session
|
|
|
154 |
del user_sessions[username]
|
155 |
return "Logged out. Please log in again.", gr.update(visible=True), gr.update(visible=False)
|
156 |
|
157 |
+
def press_submit_translation( state, package, vi_translation, en_input, en_translation, vi_input):
|
158 |
try:
|
159 |
+
submit_translation(state[0], package, vi_translation, en_input, en_translation, vi_input)
|
|
|
160 |
# Save the translation and provide feedback
|
161 |
+
gr.Info("Submitted Succesfully")
|
162 |
+
|
163 |
except Exception as e:
|
164 |
+
import traceback
|
165 |
+
print(traceback.format_exc())
|
166 |
print(e)
|
167 |
+
return "Error please try submit again!", en_input, vi_input, "", ""
|
168 |
+
|
169 |
+
try:
|
170 |
+
package[0] = get_next_package(user_id=state[0])
|
171 |
+
return f"""Submitted Succesfully""", get_next_en_text(state[0]), package[0][2], "", ""
|
172 |
+
except:
|
173 |
+
return "Failed to load new job, please reload page!", en_input, vi_input, "", ""
|
174 |
|
175 |
# Define the Gradio interface
|
176 |
with gr.Blocks() as demo:
|
177 |
state = gr.State([None])
|
178 |
+
package = gr.State([None])
|
179 |
# Login section
|
180 |
with gr.Column(visible=True) as login_section:
|
181 |
username_input = gr.Textbox(placeholder="Enter your token", label="Token ID")
|
|
|
184 |
|
185 |
# Translation section (initially hidden)
|
186 |
with gr.Column(visible=False) as translation_section:
|
187 |
+
with gr.Column() as en2vi:
|
188 |
+
gr.Markdown("### Dịch từ tiếng Anh sang tiếng Việt")
|
189 |
+
en_input = gr.Textbox(value=english_text, label="Văn bản tiếng Anh", interactive=False)
|
190 |
+
vi_translation_input = gr.Textbox(placeholder="Nhập bản dịch", label="Nhập bản dịch tiếng Việt")
|
191 |
+
|
192 |
+
with gr.Column() as en2vi:
|
193 |
+
gr.Markdown("### Dịch từ tiếng Việt sang tiếng Anh")
|
194 |
+
vi_input = gr.Textbox(value=english_text, label="Văn bản tiếng Việt", interactive=False)
|
195 |
+
en_translation_input = gr.Textbox(placeholder="Nhập bản dịch", label="Nhập bản dịch tiếng Anh")
|
196 |
+
|
197 |
+
# gr.Markdown("### Đây là văn bản máy dịch hay người dịch (kiểm tra độ tự nhiên của văn bản)")
|
198 |
+
# with gr.Row():
|
199 |
+
# eval_document = gr.Textbox(label="Văn bản", placeholder="Văn bản cần đánh giá", interactive=False)
|
200 |
+
# choice = gr.Radio(["Human-Written", "Machine-Translated"], label="How would you classify this response?")
|
201 |
+
|
202 |
+
submit_button = gr.Button("Submit")
|
203 |
translation_output = gr.Textbox(label="Submission Status", interactive=False)
|
204 |
logout_button = gr.Button("Logout")
|
205 |
|
206 |
# Button functions
|
207 |
login_button.click(
|
208 |
+
login, inputs=[username_input, state, package], outputs=[login_output, login_section, translation_section, en_input, vi_input]
|
209 |
)
|
210 |
submit_button.click(
|
211 |
+
press_submit_translation, inputs=[state, package, vi_translation_input, en_input, en_translation_input, vi_input], outputs=[translation_output, en_input, vi_input, vi_translation_input, en_translation_input]
|
212 |
)
|
213 |
logout_button.click(
|
214 |
logout, inputs=[username_input], outputs=[login_output, login_section, translation_section]
|