Add flagging to Google Form

Browse files

Files changed (11) hide show

app.py +76 -3
bad_examples/bigcode_python_code_bad_examples.jsonl +3 -0
bad_examples/bigcode_python_github_issues_bad_examples.jsonl +3 -0
bad_examples/bigcode_python_jupyter_scripts_dedup_filtered_bad_examples.jsonl +3 -0
bad_examples/books3_bad_examples.jsonl +3 -0
bad_examples/c4_bad_examples.jsonl +2 -2
bad_examples/gutenberg_raw_bad_examples.jsonl +2 -2
bad_examples/reddit_threaded_bad_examples.jsonl +3 -0
bad_examples/s2orc_raw_bad_examples.jsonl +3 -0
bad_examples/stackexchange2_bad_examples.jsonl +2 -2
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,10 +1,61 @@
 import json
 import streamlit as st
 import streamlit.components.v1 as components
 BAD_EXAMPLES_PATH = "bad_examples"
 DATA_PATH = "data"
 def load_jsonl(file_path):
@@ -24,12 +75,33 @@ def get_next_item():
     st.session_state.idx += 1
-def save_and_get_next_item(sample, issue):
     sample["issue"] = issue
     with open(f"{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl", "a") as f:
         f.write(json.dumps(sample) + "\n")
     get_next_item()
@@ -76,10 +148,11 @@ with st.form(key="bad_form", clear_on_submit=True):
     )
     good = st.form_submit_button(
-        "GOOD", on_click=get_next_item,
     )
     bad = st.form_submit_button(
         "BAD",
-        on_click=save_and_get_next_item,
         args=(sample, issue),
     )

 import json
+import math
+from functools import partial
 import streamlit as st
 import streamlit.components.v1 as components
+from gforms import Form
 BAD_EXAMPLES_PATH = "bad_examples"
 DATA_PATH = "data"
+MAX_DOC_LENGTH = 30000
+def form_callback(
+    element,
+    page_index,
+    element_index,
+    dataset,
+    docid,
+    text,
+    metadata,
+    reason,
+    person,
+    part,
+):
+    if element.name == "Dataset":
+        return dataset
+    if element.name == "Datapoint ID":
+        return docid
+    if element.name == "Text":
+        return text
+    if element.name == "Metadata":
+        return metadata
+    if element.name == "Flagging Reason":
+        return reason
+    if element.name == "Flagging Person":
+        return person
+    if element.name == "Part":
+        return part
+def report_result(dataset, docid, text, metadata, reason, person, part):
+    form = Form()
+    FORM_URL = "https://docs.google.com/forms/d/e/1FAIpQLSedYTj1pBD5L6xo6qPUKY5vleNW183FXCgc3LSSgg3AUwZWKA/viewform"
+    form.load(FORM_URL)
+    form.fill(
+        partial(
+            form_callback,
+            dataset=dataset,
+            docid=docid,
+            text=text,
+            metadata=metadata,
+            reason=reason,
+            person=person,
+            part=part,
+        ),
+    )
+    form.submit()
 def load_jsonl(file_path):
     st.session_state.idx += 1
+def save_flag_and_get_next_item(sample, issue):
     sample["issue"] = issue
     with open(f"{BAD_EXAMPLES_PATH}/{dataset}_bad_examples.jsonl", "a") as f:
         f.write(json.dumps(sample) + "\n")
+    text = sample["text"]
+    sample.pop("text")
+    sample.pop("issue")
+    sample_id = ""
+    if "id" not in sample:
+        if "title" in sample:
+            sample_id = sample["title"]
+    else:
+        sample_id = sample["id"]
+    if len(text) > MAX_DOC_LENGTH:
+        num_parts = math.ceil(len(text) / MAX_DOC_LENGTH)
+        for i in range(num_parts):
+            text_portion = text[i * MAX_DOC_LENGTH : (i + 1) * MAX_DOC_LENGTH]
+            report_result(
+                dataset, sample_id, text_portion, str(sample), issue, "", str(i)
+            )
+    else:
+        report_result(dataset, sample_id, text, str(sample), issue, "", str(0))
     get_next_item()
     )
     good = st.form_submit_button(
+        "GOOD",
+        on_click=get_next_item,
     )
     bad = st.form_submit_button(
         "BAD",
+        on_click=save_flag_and_get_next_item,
         args=(sample, issue),
     )

bad_examples/bigcode_python_code_bad_examples.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deca29f2463d96422b301c1ca4af444e1f1dad66764a2835db243fd1a7abc3c3
+size 3250

bad_examples/bigcode_python_github_issues_bad_examples.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bfc92a7f740e92393f314bac702965dd47a8084bd093d63632865fda5bb11b0
+size 2876

bad_examples/bigcode_python_jupyter_scripts_dedup_filtered_bad_examples.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3184586d973ef844d86995c33d1439dfeef8faca7813a0cdd80f0d22ca9d84fa
+size 7802

bad_examples/books3_bad_examples.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d011167cb7679eb46b595af10c2965efa2e36ce8085f2c6fe8a7c5d3a28e54d0
+size 452432

bad_examples/c4_bad_examples.jsonl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1d2500082179deff6c62072e3937f3b432f5615eaea968602f59754eb5cd69d
-size 3314

 version https://git-lfs.github.com/spec/v1
+oid sha256:2415a6bc59f376c1535f1eb2c6854b9d13a75842675b3d8231e5d81999d865b2
+size 8618

bad_examples/gutenberg_raw_bad_examples.jsonl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f116395c3f0c07973218d81c31fb2bf59c44b8b4d8f4e8a97a6228656c3a3d93
-size 145658

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d4a0bce4796569315e6af2b9f2313204d5cf108b21b69bb01a27a98b56ff643
+size 2394572

bad_examples/reddit_threaded_bad_examples.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90cccebb836615224b151fe1576ad3667933d425bc16e0e8f231671e151b0dbb
+size 2971

bad_examples/s2orc_raw_bad_examples.jsonl CHANGED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0dc4d3ee6ca348b2cd56294e65ff268c73905aec89856e4645bfa4aea108d573
+size 15219

bad_examples/stackexchange2_bad_examples.jsonl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc14cd72933da3cae9553adf26702026ac27d4895bebe994a2df3bd21f612b68
-size 40469

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d56866a48e1cd99a0bc80ab9088bf7f28e7a861d91a02630252f8fad676147b
+size 41965

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@

1	streamlit==1.20.0


1	+ gforms
2	streamlit==1.20.0