vietdata's picture
first update
9700fe3
raw
history blame
4.72 kB
import gradio as gr
from datasets import load_dataset, Dataset
from collections import defaultdict
import random
# Load the source dataset
source_dataset = load_dataset("vietdata/eng_echo", split="train")
source_texts = source_dataset["query"]
# Initialize variables
translations = defaultdict(list)
processed_data = []
# Helper function to get the next text for translation
def get_next_text(user_id):
# Filter texts that already have 10 translations
eligible_texts = [text for text in source_texts if len(translations[text]) < 10]
if not eligible_texts:
return "All texts are fully translated."
# Select a random eligible text for translation
next_text = random.choice(eligible_texts)
return next_text
# Function to handle translation submission
def submit_translation(user_id, original_text, translation):
# Check if text already has 10 translations
if len(translations[original_text]) < 10:
translations[original_text].append((user_id, translation))
# Check if 100 texts have enough translations to save
if len([t for t in translations if len(translations[t]) == 10]) >= 100:
save_to_translated_echo()
return "Translation submitted successfully."
else:
return "This text already has 10 translations. Please request a new text."
# Function to save completed translations to 'translated_echo'
def save_to_translated_echo():
global translations, processed_data
# Gather translations with exactly 10 versions
completed_translations = [
{"query": text, "translations": [t[1] for t in translations[text]]}
for text in translations if len(translations[text]) == 10
]
# Append to processed data
processed_data.extend(completed_translations)
# Reset translations
translations = {text: val for text, val in translations.items() if len(val) < 10}
# Convert to Hugging Face dataset format
translated_dataset = Dataset.from_pandas(pd.DataFrame(processed_data))
# Append to Hugging Face dataset (dummy function call)
translated_dataset.push_to_hub("vietdata/translated_echo", split="train")
import gradio as gr
# Simulated user data for demonstration
user_data = {"hello": "hello"}
# Sample English text to translate
english_text = "Translate this text to Vietnamese."
# User session dictionary to store logged-in status
user_sessions = {}
def login(username, state):
state[0] = username
# Authenticate user
if True:
#user_sessions[username] = True
return f"Welcome, {username}!", gr.update(visible=False), gr.update(visible=True), get_next_text(username)
else:
return "Invalid username or password.", gr.update(visible=True), gr.update(visible=False), ""
def logout(username):
# Log out user and reset session
if username in user_sessions:
del user_sessions[username]
return "Logged out. Please log in again.", gr.update(visible=True), gr.update(visible=False)
def submit_translation(translation, state, job_input):
try:
submit_translation(state[0], job_input, translation)
origin = job_input
# Save the translation and provide feedback
return f"""Translation of "{origin}" submitted: {translation}""", get_next_text(state[0])
except Exception as e:
print(e)
return "Error please try submit again!", job_input
# Define the Gradio interface
with gr.Blocks() as demo:
state = gr.State([None])
# Login section
with gr.Column(visible=True) as login_section:
username_input = gr.Textbox(placeholder="Enter your token", label="Token ID")
login_button = gr.Button("Login")
login_output = gr.Textbox(label="Login Status", interactive=False)
# Translation section (initially hidden)
with gr.Column(visible=False) as translation_section:
job_input = gr.Textbox(value=english_text, label="English Text", interactive=False)
translation_input = gr.Textbox(placeholder="Enter your translation here", label="Your Translation")
submit_button = gr.Button("Submit Translation")
translation_output = gr.Textbox(label="Submission Status", interactive=False)
logout_button = gr.Button("Logout")
# Button functions
login_button.click(
login, inputs=[username_input, state], outputs=[login_output, login_section, translation_section, job_input]
)
submit_button.click(
submit_translation, inputs=[translation_input, state, job_input], outputs=[translation_output, job_input]
)
logout_button.click(
logout, inputs=[username_input], outputs=[login_output, login_section, translation_section]
)
demo.launch(debug=True)