File size: 13,741 Bytes
ea5d289
 
f032594
 
 
 
ea5d289
 
f032594
ea5d289
f032594
 
 
 
 
ea5d289
f032594
 
ea5d289
f032594
ea5d289
f032594
ea5d289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f032594
 
 
 
ea5d289
f032594
 
 
 
ea5d289
 
 
 
 
 
 
 
 
 
 
 
f032594
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea5d289
 
 
 
f032594
 
ea5d289
f032594
 
ea5d289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f032594
 
 
 
 
ea5d289
 
 
 
 
 
f032594
fd730e4
a7b7f1a
 
fd730e4
 
 
a7b7f1a
 
 
 
ea5d289
 
 
 
 
 
 
 
 
 
 
 
a7b7f1a
 
 
fd730e4
f032594
 
 
ea5d289
a3a4f31
f4da2c5
943fd65
 
f4da2c5
 
 
 
 
 
 
d545206
f032594
 
ef814ac
a7b7f1a
ee75c88
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212


import matplotlib.pyplot as plt
from wordcloud import WordCloud
import nltk
from collections import Counter
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
import gradio as gr
import pandas as pd

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# Define example sentences, synonyms, and Korean meanings for the word list
word_data_examples = {
    "village": ("The village was quiet at night.", "hamlet, community", "λ§ˆμ„", "μ‹œκ³¨"),
    "adventure": ("They went on an exciting adventure in the forest.", "expedition, quest", "λͺ¨ν—˜", "μ—¬ν–‰"),
    "map": ("We used a map to find the hidden treasure.", "chart, atlas", "지도", "약도"),
    "cave": ("They explored a dark cave in the mountains.", "cavern, grotto", "동꡴", "κ΅΄"),
    "among": ("She found her book among the pile of papers.", "amidst, between", "κ°€μš΄λ°", "사이에"),
    "mountains": ("The mountains were covered with snow in winter.", "peaks, ranges", "μ‚°", "μ‚°λ§₯"),
    "children": ("The children played games in the park.", "kids, youngsters", "아이듀", "어린이"),
    "known": ("He was known for his kindness and bravery.", "recognized, famous", "μ•Œλ €μ§„", "유λͺ…ν•œ"),
    "hidden": ("They found a hidden door behind the bookshelf.", "concealed, secret", "μˆ¨κ²¨μ§„", "λΉ„λ°€μ˜"),
    "local": ("The local market was full of fresh produce.", "regional, native", "μ§€μ—­μ˜", "ν˜„μ§€μ˜"),
    "discovery": ("The discovery of the old map excited everyone.", "finding, revelation", "발견", "탐ꡬ"),
    "eagle": ("An eagle soared high above the valley.", "raptor, bird of prey", "λ…μˆ˜λ¦¬", "맹금"),
    "villagers": ("The villagers gathered in the square for the festival.", "residents, townsfolk", "λ§ˆμ„ μ‚¬λžŒλ“€", "μ£Όλ―Όλ“€"),
    "legend": ("The legend of the lost city intrigued the adventurers.", "myth, lore", "μ „μ„€", "μ‹ ν™”"),
    "tales": ("Grandma told us tales of her childhood.", "stories, narratives", "이야기", "동화"),
    "daring": ("His daring escape from the cave was legendary.", "bold, audacious", "λŒ€λ‹΄ν•œ", "μš©κ°ν•œ"),
    "spirit": ("The spirit of adventure was alive in their hearts.", "soul, essence", "μ •μ‹ ", "혼"),
    "exploring": ("They spent the summer exploring the forest.", "investigating, discovering", "νƒν—˜ν•˜λ‹€", "νƒκ΅¬ν•˜λ‹€"),
    "old": ("The old castle was full of secrets.", "ancient, aged", "였래된", "낑은"),
    "lost": ("He felt lost without his best friend.", "missing, misplaced", "μžƒμ–΄λ²„λ¦°", "길을 μžƒμ€"),
    "ancient": ("They discovered ancient artifacts in the desert.", "archaic, antique", "κ³ λŒ€μ˜", "μ˜›λ‚ μ˜"),
    "inside": ("Inside the box was a beautiful necklace.", "within, interior", "μ•ˆμͺ½", "λ‚΄λΆ€"),
    "treasure": ("They dreamed of finding hidden treasure.", "riches, valuables", "보물", "κ·€μ€‘ν’ˆ"),
    "whispering": ("The trees were whispering secrets in the wind.", "murmuring, softly speaking", "μ†μ‚­μ΄λŠ”", "쑰용히 λ§ν•˜λŠ”"),
    "hollow": ("They found a hollow tree to hide in during the storm.", "cavity, void", "빈", "ꡬ멍 λ‚œ"),
    "decided": ("She decided to take the long way home.", "determined, resolved", "κ²°μ •ν•˜λ‹€", "κ²°μ‹¬ν•˜λ‹€"),
    "journey": ("Their journey took them across the country.", "trip, voyage", "μ—¬ν–‰", "μ—¬μ •"),
    "together": ("They worked together to solve the mystery.", "jointly, collectively", "ν•¨κ»˜", "같이"),
    "way": ("She found a new way to solve the puzzle.", "method, manner", "방법", "방식"),
    "reached": ("They finally reached the top of the hill.", "arrived, attained", "λ„λ‹¬ν•˜λ‹€", "λ„μ°©ν•˜λ‹€"),
    "chest": ("The chest was filled with gold coins.", "trunk, box", "μƒμž", "κ°€μŠ΄"),
    "boulder": ("A large boulder blocked the path.", "rock, stone", "λ°”μœ„", "돌"),
    "artifacts": ("The museum displayed artifacts from ancient Egypt.", "relics, antiquities", "유물", "κ³ λŒ€ 유물"),
    "legends": ("The legends spoke of a hidden kingdom.", "myths, sagas", "μ „μ„€", "μ‹ ν™”"),
    "explore": ("They wanted to explore the old mansion.", "investigate, examine", "νƒν—˜ν•˜λ‹€", "μ‘°μ‚¬ν•˜λ‹€"),
    "secret": ("She kept the secret hidden from everyone.", "confidential, hidden", "λΉ„λ°€", "μˆ¨κ²¨μ§„"),
    "small": ("The small kitten was very playful.", "tiny, little", "μž‘μ€", "μ†Œν˜•"),
    "mountain": ("The mountain was covered in thick forests.", "peak, hill", "μ‚°", "μ‚°λ§₯"),
    "part": ("Each part of the puzzle was important.", "piece, segment", "λΆ€λΆ„", "쑰각"),
    "everyday": ("He wore his everyday clothes to the party.", "daily, routine", "일상적인", "맀일의"),
    "life": ("Life in the village was peaceful.", "existence, being", "μ‚Ά", "생λͺ…"),
    "nestled": ("The cabin was nestled in the woods.", "tucked, situated", "자리 μž‘λ‹€", "μœ„μΉ˜ν•˜λ‹€"),
    "towering": ("The towering trees made the forest dark and cool.", "lofty, soaring", "우뚝 μ†Ÿμ€", "높은"),
    "peaks": ("The mountain peaks were covered in snow.", "summits, crests", "μ‚°λ΄‰μš°λ¦¬", "정상"),
    "said": ("He said he would be back soon.", "stated, remarked", "λ§ν•˜λ‹€", "μ–ΈκΈ‰ν•˜λ‹€"),
    "protected": ("The ancient ruins were protected by law.", "guarded, sheltered", "보호된", "μ§€μΌœμ§„"),
    "massive": ("The massive ship docked at the port.", "enormous, huge", "κ±°λŒ€ν•œ", "μ—„μ²­λ‚œ"),
    "supposedly": ("The treasure was supposedly buried under the tree.", "allegedly, reportedly", "μ•„λ§ˆ", "좔정상"),
    "watched": ("They watched the movie together.", "observed, viewed", "보닀", "κ΄€μ°°ν•˜λ‹€"),
    "perch": ("The bird found a perch on the windowsill.", "roost, rest", "νšƒλŒ€", "앉닀")
}

# Words to be excluded from both the word cloud and the word list
exclude_words = set([
    'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
    'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
    'alex', 'mia', 'sam', 'echo', 'ridge', 'guardian', 'of', 'the', 'glen'
])

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

def process_text(text):
    words = nltk.word_tokenize(text)
    words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
    word_freq = Counter(words)
    pos_tags = nltk.pos_tag(words)
    return word_freq, pos_tags

def generate_wordcloud(word_freq):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_freq)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.savefig('wordcloud.png')
    return 'wordcloud.png'

def translate_and_get_pos(word_freq, pos_tags):
    pos_map = {
        'NN': 'n.', 'NNS': 'n.', 'NNP': 'n.', 'NNPS': 'n.', 'VB': 'v.', 'VBD': 'v. (κ³Όκ±°ν˜•)', 'VBG': 'v. (ingν˜•)',
        'VBN': 'v. (κ³Όκ±°λΆ„μ‚¬ν˜•/μˆ˜λ™νƒœ)', 'VBP': 'v.', 'VBZ': 'v.', 'JJ': 'adj.', 'JJR': 'adj.', 'JJS': 'adj.',
        'RB': 'adv.', 'RBR': 'adv.', 'RBS': 'adv.', 'IN': 'prep.', 'DT': 'det.', 'CC': 'conj.',
        'UH': 'intj.'
    }

    seen_verbs = set()  # To track if we have already annotated specific verb forms
    word_data = []
    for word, freq in word_freq.items():
        if word not in word_data_examples:
            continue  # Skip if the word is not in the word_data_examples

        pos_list = [pos_map.get(pos_tag[1], 'N/A') for pos_tag in pos_tags if pos_tag[0] == word and pos_tag[1] in pos_map]
        pos_list = set(pos_list) if pos_list else {'N/A'}
        if 'N/A' in pos_list or word in exclude_words:
            continue  # Skip if no valid POS or excluded word
        pos_str = ", ".join(pos_list)

        # Check if the word is a specific verb form and get the base form
        lemmatized_word = word
        original_pos_tags = [pos_tag[1] for pos_tag in pos_tags if pos_tag[0] == word]
        for pos_tag in original_pos_tags:
            wn_pos = get_wordnet_pos(pos_tag)
            if wn_pos == wordnet.VERB:
                lemmatized_word = lemmatizer.lemmatize(word, wn_pos)
                if word != lemmatized_word and lemmatized_word not in seen_verbs:
                    if pos_tag.startswith('VBD'):
                        pos_str += f" (v. {lemmatized_word}의 κ³Όκ±°ν˜•)"
                    elif pos_tag.startswith('VBG'):
                        pos_str += f" (v. {lemmatized_word}의 ingν˜•)"
                    elif pos_tag.startswith('VBN'):
                        pos_str += f" (v. {lemmatized_word}의 κ³Όκ±°λΆ„μ‚¬ν˜•/μˆ˜λ™νƒœ)"
                    seen_verbs.add(lemmatized_word)

        translation = f"{word_data_examples[word][2]}, {word_data_examples[word][3]}"
        example_sentence, synonyms = word_data_examples[word][:2]
        word_data.append((word, pos_str, translation, example_sentence, synonyms))

    # Sort the word data by frequency
    word_data.sort(key=lambda x: word_freq[x[0]], reverse=True)

    return word_data

def main(text):
    word_freq, pos_tags = process_text(text)
    wordcloud_image = generate_wordcloud(word_freq)
    word_data = translate_and_get_pos(word_freq, pos_tags)

    # Create a DataFrame to display the word data in a table format
    df = pd.DataFrame(word_data, columns=["μ–΄νœ˜ (Word)", "λ²”μ£Ό (Category)", "뜻 (Meaning)", "예문 (Example)", "λ™μ˜μ–΄ (Synonyms)"])
    word_data_table = df.to_html(index=False, justify='center')

    return wordcloud_image, word_data_table

# Custom CSS for the Gradio interface
css = """
<style>
body {
    background-color: skyblue !important;
}
.gr-button {
    background-color: blue !important;
    border-color: blue !important;
}
table {
    width: 100%;
    border-collapse: collapse;
    text-align: center;
}
th, td {
    padding: 8px;
    border: 1px solid #ddd;
}
th {
    background-color: #f2f2f2;
}
</style>
"""

# Gradio interface
interface = gr.Interface(
    fn=main,
    inputs="text",
    outputs=["image", "html"],
    title="Wordcloud Vocabulary Learning App",
    description="Input text to generate a word cloud and a frequency list with Korean meanings, parts of speech, and example sentences."
     "<br><br><b>The full text:</b><br>"
     """<blockquote>In the small mountain village of Echo Ridge, adventure was a part of everyday life. Nestled among towering peaks, the village was said to be protected by the "Guardian of the Glen," a massive eagle that supposedly watched over the villagers from its perch high in the mountains. The legend inspired many adventurous tales among the villagers, especially the children.
Among these children was a bright-eyed eighth grader named Alex. Alex was known for his daring spirit and his love for exploring the rugged landscapes around Echo Ridge. He had a particular fascination with the old maps and tales of hidden treasures that had been lost in the mountains centuries ago.
One day, while exploring the local library, Alex stumbled upon an ancient map tucked inside a forgotten book on village lore. The map hinted at the location of a lost treasure, hidden deep within a cave known as Whispering Hollow. Excited by the prospect of a real adventure, Alex decided to seek out the treasure.
Knowing the journey would be risky, he enlisted the help of his best friends, Mia and Sam. Together, they prepared for the expedition, gathering supplies and studying the map extensively. They planned their route, took note of landmarks, and readied themselves for any challenges they might face.
Their journey began at dawn. They trekked through dense forests, crossed rushing streams, and climbed steep cliffs. Along the way, they encountered various wildlife and navigated through tricky terrains, their map guiding them every step of the way.
After hours of hiking, they finally reached Whispering Hollow. The cave was more magnificent than they had imagined, filled with intricate stalactites and echoes of dripping water. Using their flashlights, they ventured deeper into the cave, guided by the markings on the map.
As they reached the heart of the cave, they discovered an ancient chest hidden behind a fallen boulder. With hearts pounding, they moved the boulder and opened the chest. Inside, instead of gold or jewels, they found a collection of old artifacts: pottery, coins, and a beautifully carved statuette of an eagle β€” the Guardian of the Glen.
Realizing the historical significance of their find, they decided to donate the artifacts to the local museum. The village celebrated their discovery, and the children were hailed as heroes. Their adventure brought the community together, sparking a renewed interest in the history and legends of Echo Ridge. Alex, Mia, and Sam became local legends, known not only for their daring but also for their spirit of discovery and respect for heritage. They continued to explore the mountains, each adventure strengthening their friendship and deepening their connection to their village.
The legend of the Guardian of the Glen lived on, not just as a protector but as a symbol of adventure and discovery, inspiring future generations to explore the mysteries of Echo Ridge.<br><br><i>Copy and paste to try.</i></blockquote>""",
)

# Launch the interface and include the custom CSS
interface.launch()
gr.HTML(css)