Spaces:
Running
Running
File size: 3,921 Bytes
090b1b7 e36bf2f 090b1b7 e36bf2f 090b1b7 5dad7c8 090b1b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import gradio as gr
import gensim
print(gensim.__version__)
import transformers
import sacremoses # for back translation tokenizer
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc
from nlpaug.util import Action
from nlpaug.util.file.download import DownloadUtil
DownloadUtil.download_word2vec(dest_dir = '.')
# Possible values are ‘wiki-news-300d-1M’, ‘wiki-news-300d-1M-subword’, ‘crawl-300d-2M’ and ‘crawl-300d-2M-subword’
DownloadUtil.download_fasttext(dest_dir = '.', model_name = 'crawl-300d-2M')
# for synonym replacement
DownloadUtil.download_glove(dest_dir = '.', model_name = 'glove.6B')
# augmentations
def augment_text(text, aug_type, model_type=None, model_path=None, aug_p=0.25, aug_max=3):
if aug_type == 'Word Embedding Substitution':
aug = naw.WordEmbsAug(
model_type=model_type,
model_path=model_path,
action="substitute",
aug_p=aug_p
)
elif aug_type == 'Contextual Insertion':
aug = naw.ContextualWordEmbsAug(
model_path='bert-base-uncased',
action="insert",
aug_p=aug_p
)
elif aug_type == 'Synonym Replacement':
aug = naw.SynonymAug(
aug_src="wordnet",
aug_max=aug_max
)
elif aug_type == 'Back Translation':
aug = naw.BackTranslationAug(
from_model_name='facebook/wmt19-en-de',
to_model_name='facebook/wmt19-de-en'
)
else:
return text
augmented_text = aug.augment(text)
return augmented_text
with gr.Blocks() as iface:
text_input = gr.Textbox(label="Input Text")
aug_type_input = gr.Radio(
choices=['Word Embedding Substitution', 'Contextual Insertion', 'Synonym Replacement', 'Back Translation'],
label="Augmentation Type",
value='Word Embedding Substitution'
)
model_type_input = gr.Dropdown(
choices=['word2vec', 'fasttext', 'glove'],
label="Model Type (for Word Embedding Substitution)",
value='word2vec',
visible=True
)
model_path_input = gr.Textbox(
label="Model Path (for Word Embedding Substitution)",
value="GoogleNews-vectors-negative300.bin",
visible=True
)
aug_p_input = gr.Slider(
minimum=0, maximum=1, step=0.05, value=0.25,
label="Probability of Augmentation (for Embedding Substitution or Contextual Insertion)"
)
aug_max_input = gr.Slider(
minimum=1, maximum=10, step=1, value=3,
label="Max Number of Words to Change (for Synonym Replacement)",
visible=False
)
augmented_output = gr.Textbox(label="Augmented Text")
# update input block visibility based on aug type
def update_inputs(aug_type):
if aug_type == 'Word Embedding Substitution':
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
elif aug_type == 'Contextual Insertion':
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
elif aug_type == 'Synonym Replacement':
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
elif aug_type == 'Back Translation':
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
# update inputs when aug type changes
aug_type_input.change(
update_inputs,
inputs=[aug_type_input],
outputs=[model_type_input, model_path_input, aug_max_input]
)
apply_button = gr.Button("Apply Augmentation")
apply_button.click(
augment_text,
inputs=[text_input, aug_type_input, model_type_input, model_path_input, aug_p_input, aug_max_input],
outputs=[augmented_output]
)
iface.launch()
|