File size: 2,945 Bytes
bc46ee1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from transformers import AutoTokenizer

bert_tokenizer = AutoTokenizer.from_pretrained('openai-community/gpt2')

def display_next_step_tokens(sentence, step):
    return (
        gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)),
        gr.Radio.update(visible=(split_selection==LABEL_RECURSIVE)),
    )

    
with gr.Blocks(theme=gr.themes.Soft(text_size='lg', font=["monospace"], primary_hue=gr.themes.colors.green)) as demo:
    text = gr.Textbox(label="Your prompt to start decoding", value="Ok, I")
    
    with gr.Row():
        split_selection = gr.Dropdown(
            choices=[
                LABEL_TEXTSPLITTER,
                LABEL_RECURSIVE,
            ],
            value=LABEL_RECURSIVE,
            label="Method to split chunks 🍞",
        )
        separators_selection = gr.Textbox(
            elem_id="textbox_id",
            value=["\n\n", "\n", " ", ""],
            info="Separators used in RecursiveCharacterTextSplitter",
            show_label=False, # or set label to an empty string if you want to keep its space
            visible=True,
        )
        separator_preset_selection = gr.Radio(
            ['Default', 'Python', 'Markdown'],
            label="Choose a preset",
            info="This will apply a specific set of separators to RecursiveCharacterTextSplitter.",
            visible=True,
        )
    with gr.Row():
        length_unit_selection = gr.Dropdown(
            choices=[
                "Character count",
                "Token count (BERT tokens)",
            ],
            value="Character count",
            label="Length function",
            info="How should we measure our chunk lengths?",
        )
        slider_count = gr.Slider(
            50, 500, value=200, step=1, label="Chunk length πŸ“", info="In the chosen unit."
        )
        chunk_overlap = gr.Slider(
            0, 50, value=10, step=1, label="Overlap between chunks", info="In the chosen unit."
        )
    out = gr.HighlightedText(
        label="Output",
        show_legend=True,
        show_label=False,
        color_map={'Overlap': '#DADADA'}
    )

    split_selection.change(
        fn=change_split_selection,
        inputs=split_selection,
        outputs=[separators_selection, separator_preset_selection],
    )
    separator_preset_selection.change(
        fn=change_preset_separators,
        inputs=separator_preset_selection,
        outputs=separators_selection,
    )
    gr.on(
        [text.change, length_unit_selection.change, separators_selection.change, split_selection.change, slider_count.change, chunk_overlap.change],
        chunk,
        [text, slider_count, split_selection, separators_selection, length_unit_selection, chunk_overlap],
        outputs=out
    )
    demo.load(chunk, inputs=[text, slider_count, split_selection, separators_selection, length_unit_selection, chunk_overlap], outputs=out)
    
demo.launch()