import gradio as gr import gensim print(gensim.__version__) import transformers import sacremoses # for back translation tokenizer import nlpaug.augmenter.char as nac import nlpaug.augmenter.word as naw import nlpaug.augmenter.sentence as nas import nlpaug.flow as nafc from nlpaug.util import Action from nlpaug.util.file.download import DownloadUtil DownloadUtil.download_word2vec(dest_dir = '.') # Possible values are ‘wiki-news-300d-1M’, ‘wiki-news-300d-1M-subword’, ‘crawl-300d-2M’ and ‘crawl-300d-2M-subword’ DownloadUtil.download_fasttext(dest_dir = '.', model_name = 'crawl-300d-2M') # for synonym replacement DownloadUtil.download_glove(dest_dir = '.', model_name = 'glove.6B') # augmentations def augment_text(text, aug_type, model_type=None, model_path=None, aug_p=0.25, aug_max=3): if aug_type == 'Word Embedding Substitution': aug = naw.WordEmbsAug( model_type=model_type, model_path=model_path, action="substitute", aug_p=aug_p ) elif aug_type == 'Contextual Insertion': aug = naw.ContextualWordEmbsAug( model_path='bert-base-uncased', action="insert", aug_p=aug_p ) elif aug_type == 'Synonym Replacement': aug = naw.SynonymAug( aug_src="wordnet", aug_max=aug_max ) elif aug_type == 'Back Translation': aug = naw.BackTranslationAug( from_model_name='facebook/wmt19-en-de', to_model_name='facebook/wmt19-de-en' ) else: return text augmented_text = aug.augment(text) return augmented_text with gr.Blocks() as iface: text_input = gr.Textbox(label="Input Text") aug_type_input = gr.Radio( choices=['Word Embedding Substitution', 'Contextual Insertion', 'Synonym Replacement', 'Back Translation'], label="Augmentation Type", value='Word Embedding Substitution' ) model_type_input = gr.Dropdown( choices=['word2vec', 'fasttext', 'glove'], label="Model Type (for Word Embedding Substitution)", value='word2vec', visible=True ) model_path_input = gr.Textbox( label="Model Path (for Word Embedding Substitution)", value="GoogleNews-vectors-negative300.bin", visible=True ) aug_p_input = gr.Slider( minimum=0, maximum=1, step=0.05, value=0.25, label="Probability of Augmentation (for Embedding Substitution or Contextual Insertion)" ) aug_max_input = gr.Slider( minimum=1, maximum=10, step=1, value=3, label="Max Number of Words to Change (for Synonym Replacement)", visible=False ) augmented_output = gr.Textbox(label="Augmented Text") # update input block visibility based on aug type def update_inputs(aug_type): if aug_type == 'Word Embedding Substitution': return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False) elif aug_type == 'Contextual Insertion': return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) elif aug_type == 'Synonym Replacement': return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) elif aug_type == 'Back Translation': return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # update inputs when aug type changes aug_type_input.change( update_inputs, inputs=[aug_type_input], outputs=[model_type_input, model_path_input, aug_max_input] ) apply_button = gr.Button("Apply Augmentation") apply_button.click( augment_text, inputs=[text_input, aug_type_input, model_type_input, model_path_input, aug_p_input, aug_max_input], outputs=[augmented_output] ) iface.launch()