Vishwas1's picture
Create app.py
802d0ef verified
# app.py
import random
import csv
import gradio as gr
import pandas as pd
import tempfile
# Word lists
nouns = [
"dog", "cat", "child", "teacher", "artist", "bird", "river", "mountain",
"book", "city", "car", "tree", "flower", "student", "computer", "phone",
"house", "garden", "song", "idea", "scientist", "engineer", "doctor",
"chef", "musician", "athlete", "writer", "poet", "farmer", "pilot"
]
adjectives = [
"quick", "lazy", "beautiful", "tall", "short", "happy", "sad", "bright",
"dark", "colorful", "quiet", "loud", "new", "old", "young", "ancient",
"modern", "cold", "warm", "soft", "hard", "heavy", "light", "calm",
"stormy", "fresh", "strong", "weak", "brave"
]
verbs = [
"run", "jump", "paint", "read", "fly", "write", "sing", "build",
"create", "discover", "learn", "teach", "drive", "grow", "think",
"talk", "listen", "play", "see", "walk", "swim", "code", "design",
"cook", "dance", "draw", "study", "explore", "invent", "research"
]
adverbs = [
"quickly", "slowly", "gracefully", "happily", "sadly", "quietly", "loudly",
"brightly", "softly", "carefully", "eagerly", "angrily", "easily", "hardly",
"rarely", "often", "never", "always", "sometimes", "soon", "daily", "patiently",
"politely", "proudly", "silently", "warmly", "well", "badly", "closely", "deeply"
]
prepositions = [
"in", "on", "over", "under", "beside", "with", "without", "near",
"between", "through", "against", "among", "around", "before", "after",
"inside", "outside", "above", "below", "across", "behind", "beyond",
"during", "for", "from", "into", "like", "off", "toward"
]
articles = ["the", "a", "an"]
conjunctions = ["and", "but", "so", "because", "when", "while", "although", "if", "unless", "since"]
# Semantic rules: mapping nouns to appropriate verbs
noun_verb_map = {
"dog": ["run", "jump", "bark", "play", "walk"],
"cat": ["meow", "sleep", "jump", "play", "purr"],
"child": ["play", "learn", "read", "laugh", "grow"],
"teacher": ["teach", "explain", "guide", "help", "learn"],
"artist": ["paint", "draw", "create", "design", "imagine"],
"bird": ["fly", "sing", "chirp", "nest", "soar"],
"river": ["flow", "run", "wind", "bend", "swell"],
"mountain": ["stand", "tower", "rise", "loom", "shadow"],
"book": ["tell", "describe", "illustrate", "explain", "reveal"],
"city": ["grow", "expand", "develop", "bustle", "sleep"],
"car": ["drive", "speed", "stop", "park", "honk"],
"tree": ["grow", "sway", "stand", "shed", "bloom"],
"flower": ["bloom", "grow", "wilt", "open", "close"],
"student": ["study", "learn", "read", "write", "graduate"],
"computer": ["compute", "process", "run", "crash", "boot"],
"phone": ["ring", "vibrate", "charge", "die", "connect"],
"house": ["stand", "shelter", "protect", "age", "burn"],
"garden": ["grow", "bloom", "flourish", "wilt", "produce"],
"song": ["play", "sound", "echo", "resonate", "end"],
"idea": ["form", "grow", "develop", "emerge", "inspire"],
"scientist": ["research", "discover", "experiment", "study", "invent"],
"engineer": ["design", "build", "develop", "test", "solve"],
"doctor": ["heal", "diagnose", "treat", "operate", "care"],
"chef": ["cook", "prepare", "taste", "create", "serve"],
"musician": ["play", "compose", "perform", "sing", "record"],
"athlete": ["run", "train", "compete", "win", "lose"],
"writer": ["write", "create", "imagine", "edit", "publish"],
"poet": ["write", "compose", "imagine", "express", "rhyme"],
"farmer": ["grow", "plant", "harvest", "plow", "raise"],
"pilot": ["fly", "navigate", "land", "take off", "command"]
}
# Sentence templates
templates = [
"{Article} {adjective} {noun} {adverb} {verb}s {preposition} {article} {adjective} {noun2}.",
"{Article} {noun} {verb}s {preposition} {article} {noun2} {conjunction} {verb2}s {adverb}.",
"{Noun_plural} {adverb} {verb} {preposition} {noun2}.",
"{Noun} {verb}s {preposition} {article} {noun2} {conjunction} {article} {noun} {verb2}s.",
"{Article} {adjective} {noun} {verb}s {preposition} {noun2} {conjunction} {adverb} {verb2}s.",
"{Noun} {verb}s {article} {noun2} {preposition} {noun}."
]
def generate_sentence():
template = random.choice(templates)
noun = random.choice(nouns)
# Get appropriate verbs for noun
verbs_for_noun = noun_verb_map.get(noun, verbs)
verb = random.choice(verbs_for_noun)
noun2 = random.choice(nouns)
# Ensure noun2 is different from noun
while noun2 == noun:
noun2 = random.choice(nouns)
# Get appropriate verbs for noun2
verbs_for_noun2 = noun_verb_map.get(noun2, verbs)
verb2 = random.choice(verbs_for_noun2)
sentence = template.format(
Article=random.choice(articles).capitalize(),
article=random.choice(articles),
adjective=random.choice(adjectives),
noun=noun,
noun2=noun2,
Noun=noun.capitalize(),
Noun_plural=noun.capitalize() + "s",
verb=verb,
verb2=verb2,
adverb=random.choice(adverbs),
preposition=random.choice(prepositions),
conjunction=random.choice(conjunctions)
)
# Capitalize the first letter and ensure proper punctuation
sentence = sentence.capitalize()
if not sentence.endswith('.'):
sentence += '.'
return sentence
def generate_sentences(num_sentences):
sentences = [generate_sentence() for _ in range(int(num_sentences))]
df = pd.DataFrame(sentences, columns=["sentence"])
# Save to a temporary CSV file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
df.to_csv(temp_file.name, index=False)
return temp_file.name
def generate_and_download(num_sentences):
csv_file = generate_sentences(num_sentences)
return csv_file
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown(
"""
# Sentence Dataset Generator with Semantic Rules
Enter the number of sentences you want to generate, and download a CSV file containing the sentences.
This generator uses semantic rules to create more coherent and meaningful sentences.
"""
)
num_sentences = gr.Number(label="Number of Sentences", value=1000, precision=0)
output = gr.File(label="Download CSV")
generate_button = gr.Button("Generate Sentences")
generate_button.click(
fn=generate_and_download,
inputs=num_sentences,
outputs=output
)
if __name__ == "__main__":
demo.launch()