loubnabnl HF staff commited on
Commit
353f3d1
1 Parent(s): 34c0fa2

update filtering

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -18,7 +18,7 @@ all_languages = list(tags.keys())
18
 
19
 
20
 
21
- @st.cache()
22
  def load_data(language, ext):
23
  ds = load_dataset(
24
  "loubnabnl/the-stack-inspection-data",
@@ -41,18 +41,17 @@ st.sidebar.header("Filters")
41
  not_lexable = st.sidebar.checkbox("Not lexable")
42
  min_alphanum = st.sidebar.slider("Minimum alphanumeric fraction", 0.0, 1.0, 1.0)
43
  max_line_length = st.sidebar.slider("Maximum line length", 0, 1000, 0)
44
- max_mean_line_length = st.sidebar.slider("Maximum average line length", 0, 2000, 0)
45
  st.sidebar.markdown("Printed files have `max_line_length` and `average_line_length` larger than the selected values.\
46
  `alphanumeric_fraction` is smaller than the selected value.")
47
 
48
  # load and filter dataset
49
  samples = load_data(chosen_language, chosen_ext)
50
 
51
- samples = samples.filter(
52
- lambda x: x["alphanum_fraction"] < min_alphanum
53
- and x["max_line_length"] > max_line_length
54
- and x["avg_line_length"] > max_mean_line_length
55
- )
56
  if not_lexable:
57
  samples = samples.filter(lambda x: not x["lexable"])
58
 
 
18
 
19
 
20
 
21
+ @st.cache_data()
22
  def load_data(language, ext):
23
  ds = load_dataset(
24
  "loubnabnl/the-stack-inspection-data",
 
41
  not_lexable = st.sidebar.checkbox("Not lexable")
42
  min_alphanum = st.sidebar.slider("Minimum alphanumeric fraction", 0.0, 1.0, 1.0)
43
  max_line_length = st.sidebar.slider("Maximum line length", 0, 1000, 0)
44
+ max_mean_line_length = st.sidebar.slider("Maximum average line length", 0, 500, 0)
45
  st.sidebar.markdown("Printed files have `max_line_length` and `average_line_length` larger than the selected values.\
46
  `alphanumeric_fraction` is smaller than the selected value.")
47
 
48
  # load and filter dataset
49
  samples = load_data(chosen_language, chosen_ext)
50
 
51
+ samples = samples.filter(lambda x: x["alphanum_fraction"] < min_alphanum)
52
+ samples = samples.filter(lambda x: x["max_line_length"] > max_line_length)
53
+ samples = samples.filter(lambda x: x["avg_line_length"] > max_mean_line_length)
54
+
 
55
  if not_lexable:
56
  samples = samples.filter(lambda x: not x["lexable"])
57