File size: 3,921 Bytes
090b1b7
e36bf2f
 
 
 
 
 
 
 
090b1b7
e36bf2f
 
 
 
090b1b7
5dad7c8
 
 
 
 
 
 
 
090b1b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import gensim
print(gensim.__version__)

import transformers

import sacremoses # for back translation tokenizer

import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc
from nlpaug.util import Action
from nlpaug.util.file.download import DownloadUtil

DownloadUtil.download_word2vec(dest_dir = '.')
# Possible values are ‘wiki-news-300d-1M’, ‘wiki-news-300d-1M-subword’, ‘crawl-300d-2M’ and ‘crawl-300d-2M-subword’

DownloadUtil.download_fasttext(dest_dir = '.', model_name = 'crawl-300d-2M')

# for synonym replacement
DownloadUtil.download_glove(dest_dir = '.', model_name = 'glove.6B')

# augmentations
def augment_text(text, aug_type, model_type=None, model_path=None, aug_p=0.25, aug_max=3):
    if aug_type == 'Word Embedding Substitution':
        aug = naw.WordEmbsAug(
            model_type=model_type,
            model_path=model_path,
            action="substitute",
            aug_p=aug_p
        )
    elif aug_type == 'Contextual Insertion':
        aug = naw.ContextualWordEmbsAug(
            model_path='bert-base-uncased',
            action="insert",
            aug_p=aug_p
        )
    elif aug_type == 'Synonym Replacement':
        aug = naw.SynonymAug(
            aug_src="wordnet",
            aug_max=aug_max
        )
    elif aug_type == 'Back Translation':
        aug = naw.BackTranslationAug(
            from_model_name='facebook/wmt19-en-de',
            to_model_name='facebook/wmt19-de-en'
        )
    else:
        return text

    augmented_text = aug.augment(text)
    return augmented_text

with gr.Blocks() as iface:
    text_input = gr.Textbox(label="Input Text")
    aug_type_input = gr.Radio(
        choices=['Word Embedding Substitution', 'Contextual Insertion', 'Synonym Replacement', 'Back Translation'], 
        label="Augmentation Type",
        value='Word Embedding Substitution'
    )
    
    model_type_input = gr.Dropdown(
        choices=['word2vec', 'fasttext', 'glove'], 
        label="Model Type (for Word Embedding Substitution)",
        value='word2vec',
        visible=True
    )
    model_path_input = gr.Textbox(
        label="Model Path (for Word Embedding Substitution)",
        value="GoogleNews-vectors-negative300.bin",
        visible=True
    )
    aug_p_input = gr.Slider(
        minimum=0, maximum=1, step=0.05, value=0.25, 
        label="Probability of Augmentation (for Embedding Substitution or Contextual Insertion)"
    )
    aug_max_input = gr.Slider(
        minimum=1, maximum=10, step=1, value=3, 
        label="Max Number of Words to Change (for Synonym Replacement)",
        visible=False
    )
    
    augmented_output = gr.Textbox(label="Augmented Text")

    # update input block visibility based on aug type
    def update_inputs(aug_type):
        if aug_type == 'Word Embedding Substitution':
            return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
        elif aug_type == 'Contextual Insertion':
            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
        elif aug_type == 'Synonym Replacement':
            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
        elif aug_type == 'Back Translation':
            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

    # update inputs when aug type changes
    aug_type_input.change(
        update_inputs, 
        inputs=[aug_type_input], 
        outputs=[model_type_input, model_path_input, aug_max_input]
    )
    
    apply_button = gr.Button("Apply Augmentation")
    
    apply_button.click(
        augment_text, 
        inputs=[text_input, aug_type_input, model_type_input, model_path_input, aug_p_input, aug_max_input],
        outputs=[augmented_output]
    )

iface.launch()