Spaces:

maximuspowers
/

text-aug-demo

Sleeping

App Files Files Community

maximuspowers commited on Sep 11

Commit

ba275d7

•

1 Parent(s): 66a944e

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -102

app.py CHANGED Viewed

@@ -1,112 +1,35 @@
 import gradio as gr
-import gensim
-print(gensim.__version__)
-import transformers
-import sacremoses # for back translation tokenizer
-import nlpaug.augmenter.char as nac
 import nlpaug.augmenter.word as naw
 import nlpaug.augmenter.sentence as nas
-import nlpaug.flow as nafc
-from nlpaug.util import Action
-from nlpaug.util.file.download import DownloadUtil
-DownloadUtil.download_word2vec(dest_dir = '.')
-# Possible values are ‘wiki-news-300d-1M’, ‘wiki-news-300d-1M-subword’, ‘crawl-300d-2M’ and ‘crawl-300d-2M-subword’
-DownloadUtil.download_fasttext(dest_dir = '.', model_name = 'crawl-300d-2M')
-# for synonym replacement
-DownloadUtil.download_glove(dest_dir = '.', model_name = 'glove.6B')
-# augmentations
-def augment_text(text, aug_type, model_type=None, model_path=None, aug_p=0.25, aug_max=3):
-    if aug_type == 'Word Embedding Substitution':
-        aug = naw.WordEmbsAug(
-            model_type=model_type,
-            model_path=model_path,
-            action="substitute",
-            aug_p=aug_p
-        )
-    elif aug_type == 'Contextual Insertion':
-        aug = naw.ContextualWordEmbsAug(
-            model_path='bert-base-uncased',
-            action="insert",
-            aug_p=aug_p
-        )
-    elif aug_type == 'Synonym Replacement':
-        aug = naw.SynonymAug(
-            aug_src="wordnet",
-            aug_max=aug_max
-        )
-    elif aug_type == 'Back Translation':
-        aug = naw.BackTranslationAug(
-            from_model_name='facebook/wmt19-en-de',
-            to_model_name='facebook/wmt19-de-en'
-        )
-    else:
-        return text
     augmented_text = aug.augment(text)
     return augmented_text
-with gr.Blocks() as iface:
-    text_input = gr.Textbox(label="Input Text")
-    aug_type_input = gr.Radio(
-        choices=['Word Embedding Substitution', 'Contextual Insertion', 'Synonym Replacement', 'Back Translation'],
-        label="Augmentation Type",
-        value='Word Embedding Substitution'
-    )
-    model_type_input = gr.Dropdown(
-        choices=['word2vec', 'fasttext', 'glove'],
-        label="Model Type (for Word Embedding Substitution)",
-        value='word2vec',
-        visible=True
-    )
-    model_path_input = gr.Textbox(
-        label="Model Path (for Word Embedding Substitution)",
-        value="GoogleNews-vectors-negative300.bin",
-        visible=True
-    )
-    aug_p_input = gr.Slider(
-        minimum=0, maximum=1, step=0.05, value=0.25,
-        label="Probability of Augmentation (for Embedding Substitution or Contextual Insertion)"
-    )
-    aug_max_input = gr.Slider(
-        minimum=1, maximum=10, step=1, value=3,
-        label="Max Number of Words to Change (for Synonym Replacement)",
-        visible=False
-    )
-    augmented_output = gr.Textbox(label="Augmented Text")
-    # update input block visibility based on aug type
-    def update_inputs(aug_type):
-        if aug_type == 'Word Embedding Substitution':
-            return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
-        elif aug_type == 'Contextual Insertion':
-            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
-        elif aug_type == 'Synonym Replacement':
-            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
-        elif aug_type == 'Back Translation':
-            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
-    # update inputs when aug type changes
-    aug_type_input.change(
-        update_inputs,
-        inputs=[aug_type_input],
-        outputs=[model_type_input, model_path_input, aug_max_input]
-    )
-    apply_button = gr.Button("Apply Augmentation")
-    apply_button.click(
-        augment_text,
-        inputs=[text_input, aug_type_input, model_type_input, model_path_input, aug_p_input, aug_max_input],
-        outputs=[augmented_output]
-    )
 iface.launch()

 import gradio as gr
 import nlpaug.augmenter.word as naw
+import nlpaug.augmenter.char as nac
 import nlpaug.augmenter.sentence as nas
+# Function for NLP augmentation
+def augment_text(text, method):
+    if method == "Synonym Replacement":
+        aug = naw.SynonymAug(aug_src="wordnet", aug_max=3)
+    elif method == "Word Embedding Substitution":
+        aug = naw.WordEmbsAug(model_type='word2vec', model_path="GoogleNews-vectors-negative300.bin", action="substitute")
+    elif method == "Contextual Word Insertion":
+        aug = naw.ContextualWordEmbsAug(model_path="bert-base-uncased", action="insert")
+    elif method == "Back Translation":
+        aug = naw.BackTranslationAug(from_model_name='facebook/wmt19-en-de', to_model_name='facebook/wmt19-de-en')
     augmented_text = aug.augment(text)
     return augmented_text
+# Gradio Interface
+def nlp_augmentor_interface(text, method):
+    augmented_text = augment_text(text, method)
+    return augmented_text
+iface = gr.Interface(
+    fn=nlp_augmentor_interface,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter sentence to augment here..."),
+        gr.Radio(["Synonym Replacement", "Word Embedding Substitution", "Contextual Word Insertion", "Back Translation"], label="Augmentation Method")
+    ],
+    outputs="text",
+    title="NLP Text Augmentation with Gradio"
+)
 iface.launch()