Spaces:

polinaeterna
/

text_quality_checker

Running on Zero

polinaeterna commited on Aug 29, 2024

Commit

7badbdb

1 Parent(s): 53f0290

show only text column

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,11 +45,6 @@ def predict(texts: list[str]):
     return predicted_domains
-# def progress():
-#     title = f"Scan finished" if num_rows == next_row_idx else "Scan in progress..."
 def plot_and_df(texts, preds):
     texts_df = pd.DataFrame({"quality": preds, "text": texts})
     counts = Counter(preds)
@@ -62,9 +57,9 @@ def plot_and_df(texts, preds):
     # counts.reset_index(inplace=True)
     return (
             gr.BarPlot(counts_df, x="quality", y="count"),
-            texts_df[texts_df["quality"] == "Low"][:20],
-            texts_df[texts_df["quality"] == "Medium"][:20],
-            texts_df[texts_df["quality"] == "High"][:20],
         )
@@ -80,7 +75,7 @@ def run_quality_check(dataset, column, batch_size, num_examples):
         batch_predictions = predict(batch_texts)
         predictions.extend(batch_predictions)
         texts_processed.extend(batch_texts)
-        yield {"scan in progress...": (i+batch_size) / num_examples}, *plot_and_df(texts_processed, predictions)
     yield {"finished": 1.}, *plot_and_df(texts_processed, predictions)
 with gr.Blocks() as demo:

     return predicted_domains
 def plot_and_df(texts, preds):
     texts_df = pd.DataFrame({"quality": preds, "text": texts})
     counts = Counter(preds)
     # counts.reset_index(inplace=True)
     return (
             gr.BarPlot(counts_df, x="quality", y="count"),
+            texts_df[texts_df["quality"] == "Low"][["text"]][:20],
+            texts_df[texts_df["quality"] == "Medium"][["text"]][:20],
+            texts_df[texts_df["quality"] == "High"][["text"]][:20],
         )
         batch_predictions = predict(batch_texts)
         predictions.extend(batch_predictions)
         texts_processed.extend(batch_texts)
+        yield {"check in progress...": (i+batch_size) / num_examples}, *plot_and_df(texts_processed, predictions)
     yield {"finished": 1.}, *plot_and_df(texts_processed, predictions)
 with gr.Blocks() as demo: