Spaces:
Sleeping
Sleeping
# app.py | |
import random | |
import csv | |
import gradio as gr | |
import pandas as pd | |
import tempfile | |
# Word lists | |
nouns = [ | |
"dog", "cat", "child", "teacher", "artist", "bird", "river", "mountain", | |
"book", "city", "car", "tree", "flower", "student", "computer", "phone", | |
"house", "garden", "song", "idea", "scientist", "engineer", "doctor", | |
"chef", "musician", "athlete", "writer", "poet", "farmer", "pilot" | |
] | |
adjectives = [ | |
"quick", "lazy", "beautiful", "tall", "short", "happy", "sad", "bright", | |
"dark", "colorful", "quiet", "loud", "new", "old", "young", "ancient", | |
"modern", "cold", "warm", "soft", "hard", "heavy", "light", "calm", | |
"stormy", "fresh", "strong", "weak", "brave" | |
] | |
verbs = [ | |
"run", "jump", "paint", "read", "fly", "write", "sing", "build", | |
"create", "discover", "learn", "teach", "drive", "grow", "think", | |
"talk", "listen", "play", "see", "walk", "swim", "code", "design", | |
"cook", "dance", "draw", "study", "explore", "invent", "research" | |
] | |
adverbs = [ | |
"quickly", "slowly", "gracefully", "happily", "sadly", "quietly", "loudly", | |
"brightly", "softly", "carefully", "eagerly", "angrily", "easily", "hardly", | |
"rarely", "often", "never", "always", "sometimes", "soon", "daily", "patiently", | |
"politely", "proudly", "silently", "warmly", "well", "badly", "closely", "deeply" | |
] | |
prepositions = [ | |
"in", "on", "over", "under", "beside", "with", "without", "near", | |
"between", "through", "against", "among", "around", "before", "after", | |
"inside", "outside", "above", "below", "across", "behind", "beyond", | |
"during", "for", "from", "into", "like", "off", "toward" | |
] | |
articles = ["the", "a", "an"] | |
conjunctions = ["and", "but", "so", "because", "when", "while", "although", "if", "unless", "since"] | |
# Semantic rules: mapping nouns to appropriate verbs | |
noun_verb_map = { | |
"dog": ["run", "jump", "bark", "play", "walk"], | |
"cat": ["meow", "sleep", "jump", "play", "purr"], | |
"child": ["play", "learn", "read", "laugh", "grow"], | |
"teacher": ["teach", "explain", "guide", "help", "learn"], | |
"artist": ["paint", "draw", "create", "design", "imagine"], | |
"bird": ["fly", "sing", "chirp", "nest", "soar"], | |
"river": ["flow", "run", "wind", "bend", "swell"], | |
"mountain": ["stand", "tower", "rise", "loom", "shadow"], | |
"book": ["tell", "describe", "illustrate", "explain", "reveal"], | |
"city": ["grow", "expand", "develop", "bustle", "sleep"], | |
"car": ["drive", "speed", "stop", "park", "honk"], | |
"tree": ["grow", "sway", "stand", "shed", "bloom"], | |
"flower": ["bloom", "grow", "wilt", "open", "close"], | |
"student": ["study", "learn", "read", "write", "graduate"], | |
"computer": ["compute", "process", "run", "crash", "boot"], | |
"phone": ["ring", "vibrate", "charge", "die", "connect"], | |
"house": ["stand", "shelter", "protect", "age", "burn"], | |
"garden": ["grow", "bloom", "flourish", "wilt", "produce"], | |
"song": ["play", "sound", "echo", "resonate", "end"], | |
"idea": ["form", "grow", "develop", "emerge", "inspire"], | |
"scientist": ["research", "discover", "experiment", "study", "invent"], | |
"engineer": ["design", "build", "develop", "test", "solve"], | |
"doctor": ["heal", "diagnose", "treat", "operate", "care"], | |
"chef": ["cook", "prepare", "taste", "create", "serve"], | |
"musician": ["play", "compose", "perform", "sing", "record"], | |
"athlete": ["run", "train", "compete", "win", "lose"], | |
"writer": ["write", "create", "imagine", "edit", "publish"], | |
"poet": ["write", "compose", "imagine", "express", "rhyme"], | |
"farmer": ["grow", "plant", "harvest", "plow", "raise"], | |
"pilot": ["fly", "navigate", "land", "take off", "command"] | |
} | |
# Sentence templates | |
templates = [ | |
"{Article} {adjective} {noun} {adverb} {verb}s {preposition} {article} {adjective} {noun2}.", | |
"{Article} {noun} {verb}s {preposition} {article} {noun2} {conjunction} {verb2}s {adverb}.", | |
"{Noun_plural} {adverb} {verb} {preposition} {noun2}.", | |
"{Noun} {verb}s {preposition} {article} {noun2} {conjunction} {article} {noun} {verb2}s.", | |
"{Article} {adjective} {noun} {verb}s {preposition} {noun2} {conjunction} {adverb} {verb2}s.", | |
"{Noun} {verb}s {article} {noun2} {preposition} {noun}." | |
] | |
def generate_sentence(): | |
template = random.choice(templates) | |
noun = random.choice(nouns) | |
# Get appropriate verbs for noun | |
verbs_for_noun = noun_verb_map.get(noun, verbs) | |
verb = random.choice(verbs_for_noun) | |
noun2 = random.choice(nouns) | |
# Ensure noun2 is different from noun | |
while noun2 == noun: | |
noun2 = random.choice(nouns) | |
# Get appropriate verbs for noun2 | |
verbs_for_noun2 = noun_verb_map.get(noun2, verbs) | |
verb2 = random.choice(verbs_for_noun2) | |
sentence = template.format( | |
Article=random.choice(articles).capitalize(), | |
article=random.choice(articles), | |
adjective=random.choice(adjectives), | |
noun=noun, | |
noun2=noun2, | |
Noun=noun.capitalize(), | |
Noun_plural=noun.capitalize() + "s", | |
verb=verb, | |
verb2=verb2, | |
adverb=random.choice(adverbs), | |
preposition=random.choice(prepositions), | |
conjunction=random.choice(conjunctions) | |
) | |
# Capitalize the first letter and ensure proper punctuation | |
sentence = sentence.capitalize() | |
if not sentence.endswith('.'): | |
sentence += '.' | |
return sentence | |
def generate_sentences(num_sentences): | |
sentences = [generate_sentence() for _ in range(int(num_sentences))] | |
df = pd.DataFrame(sentences, columns=["sentence"]) | |
# Save to a temporary CSV file | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") | |
df.to_csv(temp_file.name, index=False) | |
return temp_file.name | |
def generate_and_download(num_sentences): | |
csv_file = generate_sentences(num_sentences) | |
return csv_file | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# Sentence Dataset Generator with Semantic Rules | |
Enter the number of sentences you want to generate, and download a CSV file containing the sentences. | |
This generator uses semantic rules to create more coherent and meaningful sentences. | |
""" | |
) | |
num_sentences = gr.Number(label="Number of Sentences", value=1000, precision=0) | |
output = gr.File(label="Download CSV") | |
generate_button = gr.Button("Generate Sentences") | |
generate_button.click( | |
fn=generate_and_download, | |
inputs=num_sentences, | |
outputs=output | |
) | |
if __name__ == "__main__": | |
demo.launch() | |