File size: 13,741 Bytes
ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 ea5d289 f032594 fd730e4 a7b7f1a fd730e4 a7b7f1a ea5d289 a7b7f1a fd730e4 f032594 ea5d289 a3a4f31 f4da2c5 943fd65 f4da2c5 d545206 f032594 ef814ac a7b7f1a ee75c88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import nltk
from collections import Counter
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
import gradio as gr
import pandas as pd
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('wordnet')
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
# Define example sentences, synonyms, and Korean meanings for the word list
word_data_examples = {
"village": ("The village was quiet at night.", "hamlet, community", "λ§μ", "μ골"),
"adventure": ("They went on an exciting adventure in the forest.", "expedition, quest", "λͺ¨ν", "μ¬ν"),
"map": ("We used a map to find the hidden treasure.", "chart, atlas", "μ§λ", "μ½λ"),
"cave": ("They explored a dark cave in the mountains.", "cavern, grotto", "λκ΅΄", "κ΅΄"),
"among": ("She found her book among the pile of papers.", "amidst, between", "κ°μ΄λ°", "μ¬μ΄μ"),
"mountains": ("The mountains were covered with snow in winter.", "peaks, ranges", "μ°", "μ°λ§₯"),
"children": ("The children played games in the park.", "kids, youngsters", "μμ΄λ€", "μ΄λ¦°μ΄"),
"known": ("He was known for his kindness and bravery.", "recognized, famous", "μλ €μ§", "μ λͺ
ν"),
"hidden": ("They found a hidden door behind the bookshelf.", "concealed, secret", "μ¨κ²¨μ§", "λΉλ°μ"),
"local": ("The local market was full of fresh produce.", "regional, native", "μ§μμ", "νμ§μ"),
"discovery": ("The discovery of the old map excited everyone.", "finding, revelation", "λ°κ²¬", "νꡬ"),
"eagle": ("An eagle soared high above the valley.", "raptor, bird of prey", "λ
μ리", "λ§ΉκΈ"),
"villagers": ("The villagers gathered in the square for the festival.", "residents, townsfolk", "λ§μ μ¬λλ€", "μ£Όλ―Όλ€"),
"legend": ("The legend of the lost city intrigued the adventurers.", "myth, lore", "μ μ€", "μ ν"),
"tales": ("Grandma told us tales of her childhood.", "stories, narratives", "μ΄μΌκΈ°", "λν"),
"daring": ("His daring escape from the cave was legendary.", "bold, audacious", "λλ΄ν", "μ©κ°ν"),
"spirit": ("The spirit of adventure was alive in their hearts.", "soul, essence", "μ μ ", "νΌ"),
"exploring": ("They spent the summer exploring the forest.", "investigating, discovering", "νννλ€", "νꡬνλ€"),
"old": ("The old castle was full of secrets.", "ancient, aged", "μ€λλ", "λ‘μ"),
"lost": ("He felt lost without his best friend.", "missing, misplaced", "μμ΄λ²λ¦°", "κΈΈμ μμ"),
"ancient": ("They discovered ancient artifacts in the desert.", "archaic, antique", "κ³ λμ", "μλ μ"),
"inside": ("Inside the box was a beautiful necklace.", "within, interior", "μμͺ½", "λ΄λΆ"),
"treasure": ("They dreamed of finding hidden treasure.", "riches, valuables", "보물", "κ·μ€ν"),
"whispering": ("The trees were whispering secrets in the wind.", "murmuring, softly speaking", "μμμ΄λ", "μ‘°μ©ν λ§νλ"),
"hollow": ("They found a hollow tree to hide in during the storm.", "cavity, void", "λΉ", "κ΅¬λ© λ"),
"decided": ("She decided to take the long way home.", "determined, resolved", "κ²°μ νλ€", "κ²°μ¬νλ€"),
"journey": ("Their journey took them across the country.", "trip, voyage", "μ¬ν", "μ¬μ "),
"together": ("They worked together to solve the mystery.", "jointly, collectively", "ν¨κ»", "κ°μ΄"),
"way": ("She found a new way to solve the puzzle.", "method, manner", "λ°©λ²", "λ°©μ"),
"reached": ("They finally reached the top of the hill.", "arrived, attained", "λλ¬νλ€", "λμ°©νλ€"),
"chest": ("The chest was filled with gold coins.", "trunk, box", "μμ", "κ°μ΄"),
"boulder": ("A large boulder blocked the path.", "rock, stone", "λ°μ", "λ"),
"artifacts": ("The museum displayed artifacts from ancient Egypt.", "relics, antiquities", "μ λ¬Ό", "κ³ λ μ λ¬Ό"),
"legends": ("The legends spoke of a hidden kingdom.", "myths, sagas", "μ μ€", "μ ν"),
"explore": ("They wanted to explore the old mansion.", "investigate, examine", "νννλ€", "μ‘°μ¬νλ€"),
"secret": ("She kept the secret hidden from everyone.", "confidential, hidden", "λΉλ°", "μ¨κ²¨μ§"),
"small": ("The small kitten was very playful.", "tiny, little", "μμ", "μν"),
"mountain": ("The mountain was covered in thick forests.", "peak, hill", "μ°", "μ°λ§₯"),
"part": ("Each part of the puzzle was important.", "piece, segment", "λΆλΆ", "μ‘°κ°"),
"everyday": ("He wore his everyday clothes to the party.", "daily, routine", "μΌμμ μΈ", "맀μΌμ"),
"life": ("Life in the village was peaceful.", "existence, being", "μΆ", "μλͺ
"),
"nestled": ("The cabin was nestled in the woods.", "tucked, situated", "μ리 μ‘λ€", "μμΉνλ€"),
"towering": ("The towering trees made the forest dark and cool.", "lofty, soaring", "μ°λ μμ", "λμ"),
"peaks": ("The mountain peaks were covered in snow.", "summits, crests", "μ°λ΄μ°λ¦¬", "μ μ"),
"said": ("He said he would be back soon.", "stated, remarked", "λ§νλ€", "μΈκΈνλ€"),
"protected": ("The ancient ruins were protected by law.", "guarded, sheltered", "보νΈλ", "μ§μΌμ§"),
"massive": ("The massive ship docked at the port.", "enormous, huge", "κ±°λν", "μμ²λ"),
"supposedly": ("The treasure was supposedly buried under the tree.", "allegedly, reportedly", "μλ§", "μΆμ μ"),
"watched": ("They watched the movie together.", "observed, viewed", "보λ€", "κ΄μ°°νλ€"),
"perch": ("The bird found a perch on the windowsill.", "roost, rest", "νλ", "μλ€")
}
# Words to be excluded from both the word cloud and the word list
exclude_words = set([
'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
'alex', 'mia', 'sam', 'echo', 'ridge', 'guardian', 'of', 'the', 'glen'
])
def get_wordnet_pos(treebank_tag):
if treebank_tag.startswith('J'):
return wordnet.ADJ
elif treebank_tag.startswith('V'):
return wordnet.VERB
elif treebank_tag.startswith('N'):
return wordnet.NOUN
elif treebank_tag.startswith('R'):
return wordnet.ADV
else:
return None
def process_text(text):
words = nltk.word_tokenize(text)
words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
word_freq = Counter(words)
pos_tags = nltk.pos_tag(words)
return word_freq, pos_tags
def generate_wordcloud(word_freq):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_freq)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.savefig('wordcloud.png')
return 'wordcloud.png'
def translate_and_get_pos(word_freq, pos_tags):
pos_map = {
'NN': 'n.', 'NNS': 'n.', 'NNP': 'n.', 'NNPS': 'n.', 'VB': 'v.', 'VBD': 'v. (κ³Όκ±°ν)', 'VBG': 'v. (ingν)',
'VBN': 'v. (κ³Όκ±°λΆμ¬ν/μλν)', 'VBP': 'v.', 'VBZ': 'v.', 'JJ': 'adj.', 'JJR': 'adj.', 'JJS': 'adj.',
'RB': 'adv.', 'RBR': 'adv.', 'RBS': 'adv.', 'IN': 'prep.', 'DT': 'det.', 'CC': 'conj.',
'UH': 'intj.'
}
seen_verbs = set() # To track if we have already annotated specific verb forms
word_data = []
for word, freq in word_freq.items():
if word not in word_data_examples:
continue # Skip if the word is not in the word_data_examples
pos_list = [pos_map.get(pos_tag[1], 'N/A') for pos_tag in pos_tags if pos_tag[0] == word and pos_tag[1] in pos_map]
pos_list = set(pos_list) if pos_list else {'N/A'}
if 'N/A' in pos_list or word in exclude_words:
continue # Skip if no valid POS or excluded word
pos_str = ", ".join(pos_list)
# Check if the word is a specific verb form and get the base form
lemmatized_word = word
original_pos_tags = [pos_tag[1] for pos_tag in pos_tags if pos_tag[0] == word]
for pos_tag in original_pos_tags:
wn_pos = get_wordnet_pos(pos_tag)
if wn_pos == wordnet.VERB:
lemmatized_word = lemmatizer.lemmatize(word, wn_pos)
if word != lemmatized_word and lemmatized_word not in seen_verbs:
if pos_tag.startswith('VBD'):
pos_str += f" (v. {lemmatized_word}μ κ³Όκ±°ν)"
elif pos_tag.startswith('VBG'):
pos_str += f" (v. {lemmatized_word}μ ingν)"
elif pos_tag.startswith('VBN'):
pos_str += f" (v. {lemmatized_word}μ κ³Όκ±°λΆμ¬ν/μλν)"
seen_verbs.add(lemmatized_word)
translation = f"{word_data_examples[word][2]}, {word_data_examples[word][3]}"
example_sentence, synonyms = word_data_examples[word][:2]
word_data.append((word, pos_str, translation, example_sentence, synonyms))
# Sort the word data by frequency
word_data.sort(key=lambda x: word_freq[x[0]], reverse=True)
return word_data
def main(text):
word_freq, pos_tags = process_text(text)
wordcloud_image = generate_wordcloud(word_freq)
word_data = translate_and_get_pos(word_freq, pos_tags)
# Create a DataFrame to display the word data in a table format
df = pd.DataFrame(word_data, columns=["μ΄ν (Word)", "λ²μ£Ό (Category)", "λ» (Meaning)", "μλ¬Έ (Example)", "λμμ΄ (Synonyms)"])
word_data_table = df.to_html(index=False, justify='center')
return wordcloud_image, word_data_table
# Custom CSS for the Gradio interface
css = """
<style>
body {
background-color: skyblue !important;
}
.gr-button {
background-color: blue !important;
border-color: blue !important;
}
table {
width: 100%;
border-collapse: collapse;
text-align: center;
}
th, td {
padding: 8px;
border: 1px solid #ddd;
}
th {
background-color: #f2f2f2;
}
</style>
"""
# Gradio interface
interface = gr.Interface(
fn=main,
inputs="text",
outputs=["image", "html"],
title="Wordcloud Vocabulary Learning App",
description="Input text to generate a word cloud and a frequency list with Korean meanings, parts of speech, and example sentences."
"<br><br><b>The full text:</b><br>"
"""<blockquote>In the small mountain village of Echo Ridge, adventure was a part of everyday life. Nestled among towering peaks, the village was said to be protected by the "Guardian of the Glen," a massive eagle that supposedly watched over the villagers from its perch high in the mountains. The legend inspired many adventurous tales among the villagers, especially the children.
Among these children was a bright-eyed eighth grader named Alex. Alex was known for his daring spirit and his love for exploring the rugged landscapes around Echo Ridge. He had a particular fascination with the old maps and tales of hidden treasures that had been lost in the mountains centuries ago.
One day, while exploring the local library, Alex stumbled upon an ancient map tucked inside a forgotten book on village lore. The map hinted at the location of a lost treasure, hidden deep within a cave known as Whispering Hollow. Excited by the prospect of a real adventure, Alex decided to seek out the treasure.
Knowing the journey would be risky, he enlisted the help of his best friends, Mia and Sam. Together, they prepared for the expedition, gathering supplies and studying the map extensively. They planned their route, took note of landmarks, and readied themselves for any challenges they might face.
Their journey began at dawn. They trekked through dense forests, crossed rushing streams, and climbed steep cliffs. Along the way, they encountered various wildlife and navigated through tricky terrains, their map guiding them every step of the way.
After hours of hiking, they finally reached Whispering Hollow. The cave was more magnificent than they had imagined, filled with intricate stalactites and echoes of dripping water. Using their flashlights, they ventured deeper into the cave, guided by the markings on the map.
As they reached the heart of the cave, they discovered an ancient chest hidden behind a fallen boulder. With hearts pounding, they moved the boulder and opened the chest. Inside, instead of gold or jewels, they found a collection of old artifacts: pottery, coins, and a beautifully carved statuette of an eagle β the Guardian of the Glen.
Realizing the historical significance of their find, they decided to donate the artifacts to the local museum. The village celebrated their discovery, and the children were hailed as heroes. Their adventure brought the community together, sparking a renewed interest in the history and legends of Echo Ridge. Alex, Mia, and Sam became local legends, known not only for their daring but also for their spirit of discovery and respect for heritage. They continued to explore the mountains, each adventure strengthening their friendship and deepening their connection to their village.
The legend of the Guardian of the Glen lived on, not just as a protector but as a symbol of adventure and discovery, inspiring future generations to explore the mysteries of Echo Ridge.<br><br><i>Copy and paste to try.</i></blockquote>""",
)
# Launch the interface and include the custom CSS
interface.launch()
gr.HTML(css)
|