# app.py import random import csv import gradio as gr import pandas as pd import tempfile # Word lists nouns = [ "dog", "cat", "child", "teacher", "artist", "bird", "river", "mountain", "book", "city", "car", "tree", "flower", "student", "computer", "phone", "house", "garden", "song", "idea", "scientist", "engineer", "doctor", "chef", "musician", "athlete", "writer", "poet", "farmer", "pilot" ] adjectives = [ "quick", "lazy", "beautiful", "tall", "short", "happy", "sad", "bright", "dark", "colorful", "quiet", "loud", "new", "old", "young", "ancient", "modern", "cold", "warm", "soft", "hard", "heavy", "light", "calm", "stormy", "fresh", "strong", "weak", "brave" ] verbs = [ "run", "jump", "paint", "read", "fly", "write", "sing", "build", "create", "discover", "learn", "teach", "drive", "grow", "think", "talk", "listen", "play", "see", "walk", "swim", "code", "design", "cook", "dance", "draw", "study", "explore", "invent", "research" ] adverbs = [ "quickly", "slowly", "gracefully", "happily", "sadly", "quietly", "loudly", "brightly", "softly", "carefully", "eagerly", "angrily", "easily", "hardly", "rarely", "often", "never", "always", "sometimes", "soon", "daily", "patiently", "politely", "proudly", "silently", "warmly", "well", "badly", "closely", "deeply" ] prepositions = [ "in", "on", "over", "under", "beside", "with", "without", "near", "between", "through", "against", "among", "around", "before", "after", "inside", "outside", "above", "below", "across", "behind", "beyond", "during", "for", "from", "into", "like", "off", "toward" ] articles = ["the", "a", "an"] conjunctions = ["and", "but", "so", "because", "when", "while", "although", "if", "unless", "since"] # Semantic rules: mapping nouns to appropriate verbs noun_verb_map = { "dog": ["run", "jump", "bark", "play", "walk"], "cat": ["meow", "sleep", "jump", "play", "purr"], "child": ["play", "learn", "read", "laugh", "grow"], "teacher": ["teach", "explain", "guide", "help", "learn"], "artist": ["paint", "draw", "create", "design", "imagine"], "bird": ["fly", "sing", "chirp", "nest", "soar"], "river": ["flow", "run", "wind", "bend", "swell"], "mountain": ["stand", "tower", "rise", "loom", "shadow"], "book": ["tell", "describe", "illustrate", "explain", "reveal"], "city": ["grow", "expand", "develop", "bustle", "sleep"], "car": ["drive", "speed", "stop", "park", "honk"], "tree": ["grow", "sway", "stand", "shed", "bloom"], "flower": ["bloom", "grow", "wilt", "open", "close"], "student": ["study", "learn", "read", "write", "graduate"], "computer": ["compute", "process", "run", "crash", "boot"], "phone": ["ring", "vibrate", "charge", "die", "connect"], "house": ["stand", "shelter", "protect", "age", "burn"], "garden": ["grow", "bloom", "flourish", "wilt", "produce"], "song": ["play", "sound", "echo", "resonate", "end"], "idea": ["form", "grow", "develop", "emerge", "inspire"], "scientist": ["research", "discover", "experiment", "study", "invent"], "engineer": ["design", "build", "develop", "test", "solve"], "doctor": ["heal", "diagnose", "treat", "operate", "care"], "chef": ["cook", "prepare", "taste", "create", "serve"], "musician": ["play", "compose", "perform", "sing", "record"], "athlete": ["run", "train", "compete", "win", "lose"], "writer": ["write", "create", "imagine", "edit", "publish"], "poet": ["write", "compose", "imagine", "express", "rhyme"], "farmer": ["grow", "plant", "harvest", "plow", "raise"], "pilot": ["fly", "navigate", "land", "take off", "command"] } # Sentence templates templates = [ "{Article} {adjective} {noun} {adverb} {verb}s {preposition} {article} {adjective} {noun2}.", "{Article} {noun} {verb}s {preposition} {article} {noun2} {conjunction} {verb2}s {adverb}.", "{Noun_plural} {adverb} {verb} {preposition} {noun2}.", "{Noun} {verb}s {preposition} {article} {noun2} {conjunction} {article} {noun} {verb2}s.", "{Article} {adjective} {noun} {verb}s {preposition} {noun2} {conjunction} {adverb} {verb2}s.", "{Noun} {verb}s {article} {noun2} {preposition} {noun}." ] def generate_sentence(): template = random.choice(templates) noun = random.choice(nouns) # Get appropriate verbs for noun verbs_for_noun = noun_verb_map.get(noun, verbs) verb = random.choice(verbs_for_noun) noun2 = random.choice(nouns) # Ensure noun2 is different from noun while noun2 == noun: noun2 = random.choice(nouns) # Get appropriate verbs for noun2 verbs_for_noun2 = noun_verb_map.get(noun2, verbs) verb2 = random.choice(verbs_for_noun2) sentence = template.format( Article=random.choice(articles).capitalize(), article=random.choice(articles), adjective=random.choice(adjectives), noun=noun, noun2=noun2, Noun=noun.capitalize(), Noun_plural=noun.capitalize() + "s", verb=verb, verb2=verb2, adverb=random.choice(adverbs), preposition=random.choice(prepositions), conjunction=random.choice(conjunctions) ) # Capitalize the first letter and ensure proper punctuation sentence = sentence.capitalize() if not sentence.endswith('.'): sentence += '.' return sentence def generate_sentences(num_sentences): sentences = [generate_sentence() for _ in range(int(num_sentences))] df = pd.DataFrame(sentences, columns=["sentence"]) # Save to a temporary CSV file temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") df.to_csv(temp_file.name, index=False) return temp_file.name def generate_and_download(num_sentences): csv_file = generate_sentences(num_sentences) return csv_file # Gradio Interface with gr.Blocks() as demo: gr.Markdown( """ # Sentence Dataset Generator with Semantic Rules Enter the number of sentences you want to generate, and download a CSV file containing the sentences. This generator uses semantic rules to create more coherent and meaningful sentences. """ ) num_sentences = gr.Number(label="Number of Sentences", value=1000, precision=0) output = gr.File(label="Download CSV") generate_button = gr.Button("Generate Sentences") generate_button.click( fn=generate_and_download, inputs=num_sentences, outputs=output ) if __name__ == "__main__": demo.launch()