the-stack-inspection

Running

loubnabnl HF staff commited on Feb 14, 2023

Commit

353f3d1

•

1 Parent(s): 34c0fa2

update filtering

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ all_languages = list(tags.keys())
-@st.cache()
 def load_data(language, ext):
     ds = load_dataset(
         "loubnabnl/the-stack-inspection-data",
@@ -41,18 +41,17 @@ st.sidebar.header("Filters")
 not_lexable = st.sidebar.checkbox("Not lexable")
 min_alphanum = st.sidebar.slider("Minimum alphanumeric fraction", 0.0, 1.0, 1.0)
 max_line_length = st.sidebar.slider("Maximum line length", 0, 1000, 0)
-max_mean_line_length = st.sidebar.slider("Maximum average line length", 0, 2000, 0)
 st.sidebar.markdown("Printed files have `max_line_length`  and `average_line_length` larger than the selected values.\
 `alphanumeric_fraction` is smaller than the selected value.")
 # load and filter dataset
 samples = load_data(chosen_language, chosen_ext)
-samples = samples.filter(
-    lambda x: x["alphanum_fraction"] < min_alphanum
-    and x["max_line_length"] > max_line_length
-    and x["avg_line_length"] > max_mean_line_length
-)
 if not_lexable:
     samples = samples.filter(lambda x: not x["lexable"])

+@st.cache_data()
 def load_data(language, ext):
     ds = load_dataset(
         "loubnabnl/the-stack-inspection-data",
 not_lexable = st.sidebar.checkbox("Not lexable")
 min_alphanum = st.sidebar.slider("Minimum alphanumeric fraction", 0.0, 1.0, 1.0)
 max_line_length = st.sidebar.slider("Maximum line length", 0, 1000, 0)
+max_mean_line_length = st.sidebar.slider("Maximum average line length", 0, 500, 0)
 st.sidebar.markdown("Printed files have `max_line_length`  and `average_line_length` larger than the selected values.\
 `alphanumeric_fraction` is smaller than the selected value.")
 # load and filter dataset
 samples = load_data(chosen_language, chosen_ext)
+samples = samples.filter(lambda x: x["alphanum_fraction"] < min_alphanum)
+samples = samples.filter(lambda x: x["max_line_length"] > max_line_length)
+samples = samples.filter(lambda x: x["avg_line_length"] > max_mean_line_length)
 if not_lexable:
     samples = samples.filter(lambda x: not x["lexable"])