Spaces:
Sleeping
Sleeping
maximuspowers
commited on
Commit
•
ba275d7
1
Parent(s):
66a944e
Update app.py
Browse files
app.py
CHANGED
@@ -1,112 +1,35 @@
|
|
1 |
import gradio as gr
|
2 |
-
import gensim
|
3 |
-
print(gensim.__version__)
|
4 |
-
|
5 |
-
import transformers
|
6 |
-
|
7 |
-
import sacremoses # for back translation tokenizer
|
8 |
-
|
9 |
-
import nlpaug.augmenter.char as nac
|
10 |
import nlpaug.augmenter.word as naw
|
|
|
11 |
import nlpaug.augmenter.sentence as nas
|
12 |
-
import nlpaug.flow as nafc
|
13 |
-
from nlpaug.util import Action
|
14 |
-
from nlpaug.util.file.download import DownloadUtil
|
15 |
-
|
16 |
-
DownloadUtil.download_word2vec(dest_dir = '.')
|
17 |
-
# Possible values are ‘wiki-news-300d-1M’, ‘wiki-news-300d-1M-subword’, ‘crawl-300d-2M’ and ‘crawl-300d-2M-subword’
|
18 |
-
|
19 |
-
DownloadUtil.download_fasttext(dest_dir = '.', model_name = 'crawl-300d-2M')
|
20 |
-
|
21 |
-
# for synonym replacement
|
22 |
-
DownloadUtil.download_glove(dest_dir = '.', model_name = 'glove.6B')
|
23 |
-
|
24 |
-
# augmentations
|
25 |
-
def augment_text(text, aug_type, model_type=None, model_path=None, aug_p=0.25, aug_max=3):
|
26 |
-
if aug_type == 'Word Embedding Substitution':
|
27 |
-
aug = naw.WordEmbsAug(
|
28 |
-
model_type=model_type,
|
29 |
-
model_path=model_path,
|
30 |
-
action="substitute",
|
31 |
-
aug_p=aug_p
|
32 |
-
)
|
33 |
-
elif aug_type == 'Contextual Insertion':
|
34 |
-
aug = naw.ContextualWordEmbsAug(
|
35 |
-
model_path='bert-base-uncased',
|
36 |
-
action="insert",
|
37 |
-
aug_p=aug_p
|
38 |
-
)
|
39 |
-
elif aug_type == 'Synonym Replacement':
|
40 |
-
aug = naw.SynonymAug(
|
41 |
-
aug_src="wordnet",
|
42 |
-
aug_max=aug_max
|
43 |
-
)
|
44 |
-
elif aug_type == 'Back Translation':
|
45 |
-
aug = naw.BackTranslationAug(
|
46 |
-
from_model_name='facebook/wmt19-en-de',
|
47 |
-
to_model_name='facebook/wmt19-de-en'
|
48 |
-
)
|
49 |
-
else:
|
50 |
-
return text
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
augmented_text = aug.augment(text)
|
53 |
return augmented_text
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
label="Augmentation Type",
|
60 |
-
value='Word Embedding Substitution'
|
61 |
-
)
|
62 |
-
|
63 |
-
model_type_input = gr.Dropdown(
|
64 |
-
choices=['word2vec', 'fasttext', 'glove'],
|
65 |
-
label="Model Type (for Word Embedding Substitution)",
|
66 |
-
value='word2vec',
|
67 |
-
visible=True
|
68 |
-
)
|
69 |
-
model_path_input = gr.Textbox(
|
70 |
-
label="Model Path (for Word Embedding Substitution)",
|
71 |
-
value="GoogleNews-vectors-negative300.bin",
|
72 |
-
visible=True
|
73 |
-
)
|
74 |
-
aug_p_input = gr.Slider(
|
75 |
-
minimum=0, maximum=1, step=0.05, value=0.25,
|
76 |
-
label="Probability of Augmentation (for Embedding Substitution or Contextual Insertion)"
|
77 |
-
)
|
78 |
-
aug_max_input = gr.Slider(
|
79 |
-
minimum=1, maximum=10, step=1, value=3,
|
80 |
-
label="Max Number of Words to Change (for Synonym Replacement)",
|
81 |
-
visible=False
|
82 |
-
)
|
83 |
-
|
84 |
-
augmented_output = gr.Textbox(label="Augmented Text")
|
85 |
-
|
86 |
-
# update input block visibility based on aug type
|
87 |
-
def update_inputs(aug_type):
|
88 |
-
if aug_type == 'Word Embedding Substitution':
|
89 |
-
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
|
90 |
-
elif aug_type == 'Contextual Insertion':
|
91 |
-
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
|
92 |
-
elif aug_type == 'Synonym Replacement':
|
93 |
-
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
|
94 |
-
elif aug_type == 'Back Translation':
|
95 |
-
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
apply_button.click(
|
107 |
-
augment_text,
|
108 |
-
inputs=[text_input, aug_type_input, model_type_input, model_path_input, aug_p_input, aug_max_input],
|
109 |
-
outputs=[augmented_output]
|
110 |
-
)
|
111 |
|
112 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import nlpaug.augmenter.word as naw
|
3 |
+
import nlpaug.augmenter.char as nac
|
4 |
import nlpaug.augmenter.sentence as nas
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# Function for NLP augmentation
|
7 |
+
def augment_text(text, method):
|
8 |
+
if method == "Synonym Replacement":
|
9 |
+
aug = naw.SynonymAug(aug_src="wordnet", aug_max=3)
|
10 |
+
elif method == "Word Embedding Substitution":
|
11 |
+
aug = naw.WordEmbsAug(model_type='word2vec', model_path="GoogleNews-vectors-negative300.bin", action="substitute")
|
12 |
+
elif method == "Contextual Word Insertion":
|
13 |
+
aug = naw.ContextualWordEmbsAug(model_path="bert-base-uncased", action="insert")
|
14 |
+
elif method == "Back Translation":
|
15 |
+
aug = naw.BackTranslationAug(from_model_name='facebook/wmt19-en-de', to_model_name='facebook/wmt19-de-en')
|
16 |
+
|
17 |
augmented_text = aug.augment(text)
|
18 |
return augmented_text
|
19 |
|
20 |
+
# Gradio Interface
|
21 |
+
def nlp_augmentor_interface(text, method):
|
22 |
+
augmented_text = augment_text(text, method)
|
23 |
+
return augmented_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
iface = gr.Interface(
|
26 |
+
fn=nlp_augmentor_interface,
|
27 |
+
inputs=[
|
28 |
+
gr.Textbox(lines=2, placeholder="Enter sentence to augment here..."),
|
29 |
+
gr.Radio(["Synonym Replacement", "Word Embedding Substitution", "Contextual Word Insertion", "Back Translation"], label="Augmentation Method")
|
30 |
+
],
|
31 |
+
outputs="text",
|
32 |
+
title="NLP Text Augmentation with Gradio"
|
33 |
+
)
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
iface.launch()
|