Spaces:

CarperAI
/

pilev2_pipeline

Runtime error

App Files Files Community

ncoop57 commited on Nov 26, 2022

Commit

4c20fbb

1 Parent(s): 82935d8

Have initial setup of layout and fake data

Browse files

Files changed (2) hide show

app.py +231 -20
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -1,27 +1,238 @@
 import gradio as gr
-def sentence_builder(quantity, animal, place, activity_list, morning):
-    return f"""The {quantity} {animal}s went to the {place} where they {" and ".join(activity_list)} until the {"morning" if morning else "night"}"""
-demo = gr.Interface(
-    sentence_builder,
-    [
-        gr.Slider(2, 20, value=4),
-        gr.Dropdown(["cat", "dog", "bird"]),
-        gr.Radio(["park", "zoo", "road"]),
-        gr.CheckboxGroup(["ran", "swam", "ate", "slept"]),
-        gr.Checkbox(label="Is it the morning?"),
-    ],
-    "text",
-    examples=[
-        [2, "cat", "park", ["ran", "swam"], True],
-        [4, "dog", "zoo", ["ate", "swam"], False],
-        [10, "bird", "road", ["ran"], False],
-        [8, "cat", "zoo", ["ate"], True],
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
+# ai4code_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/AI4Code")
+# amps_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/AMPS")
+# apache_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/ASFPublicMail")
+# books3_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/Books3")
+# cp_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/CPDataset")
+# dmmath_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/DMMath")
+# discourse_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/Discourse")
+# wiki_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/Enwiki")
+# euro_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/EuroParliamentProceedings")
+# freelaw_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/FreeLaw_Options")
+# ghdiffs_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/GitHubDiff")
+# ghissues_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/GitHubIssues")
+# gutenberg_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/Gutenberg")
+# leet_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/LeetCode")
+# pileoflaw_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/PileOfLaw")
+# pubmed_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/PubMed")
+# s2orc_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/S2ORC")
+# se_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/StackExchange")
+# usenet_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/USENET")
+# uspto_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/USPTO")
+# ubuntuirc_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/UbuntuIRC")
+# arxiv_ds = load_dataset("CarperAI/pile-v2-small", data_dir="data/arXiv")
+dataset_data = {
+    "AI4Code": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "AMPS": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "ASFPublicMail": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "Books3": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "CPDataset": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "DMMath": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "Discourse": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "Enwiki": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "EuroParliamentProceedings": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "FreeLaw_Options": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "GitHubDiff": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "GitHubIssues": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "Gutenberg": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "LeetCode": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "PileOfLaw": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "PubMed": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "S2ORC": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "StackExchange": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "USENET": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "USPTO": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "UbuntuIRC": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+    "arXiv": {
+        # create fake data for the different ratios
+        "word_rep_ratios": np.random.randn(1000),
+        "char_rep_ratios": np.random.randn(1000),
+        "flagged_word_ratios": np.random.randn(1000),
+        "num_words": np.random.randint(0, 1000, 1000),
+    },
+}
+def plt_plot(threshold, x):
+    # prepare some data for a histogram
+    # x = np.random.randn(1000)
+    # create a figure
+    fig = plt.figure()
+    # add a subplot
+    ax = fig.add_subplot(111)
+    # plot some data
+    ax.hist(x, bins=50)
+    # plot red dashed line at threshold
+    ax.axvline(threshold, color='r', linestyle='dashed', linewidth=2)
+    plt.title("Histogram of random data")
+    plt.xlabel("Value")
+    plt.ylabel("Frequency")
+    return fig
+    # x = ["Math", "Business", "Statistics", "IT", "Commerce"]
+    # y = [68, 73, 82, 74, 85]
+    # # create a new plot
+    # plt.rcParams['figure.figsize'] = 6,4
+    # fig = plt.figure()
+    # ax = fig.add_axes([0,0,1,1])
+    # ax.bar(x, y)
+    # plot red dashed line at threshold
+    # plt.axhline(y=threshold, color='r', linestyle='--')
+    # plt.title("Marks per subject")
+    # plt.xlabel("Subject")
+    # plt.ylabel("Score")
+    # return fig
+with gr.Blocks() as demo:
+    dataset = gr.Radio(list(dataset_data.keys()), label="Dataset")
+    with gr.Tab("Character Repetition Ratio"):
+        # plot some random data
+        plot = gr.Plot()
+        threshold = gr.Slider(minimum=0, maximum=100, label="Threshold")
+        calculate = gr.Button("Calculate")
+        calculate.click(plt_plot, [threshold, dataset_data[dataset].char_rep_ratios], plot)
+    with gr.Tab("Word Repetition Ratio"):# plot some random data
+        plot = gr.Plot()
+        threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
+        calculate = gr.Button("Calculate")
+        calculate.click(plt_plot, [threshold, dataset_data[dataset].word_rep_ratios], plot)
+    with gr.Tab("Flagged Word Ratio"):# plot some random data
+        plot = gr.Plot()
+        threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
+        calculate = gr.Button("Calculate")
+        calculate.click(plt_plot, [threshold, dataset_data[dataset].flagged_word_ratios], plot)
 if __name__ == "__main__":
+    demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ scrubadub
2	+ squeakily