Spaces:
Build error
Build error
Merge branch 'main' of https://huggingface.co/spaces/CarperAI/pile-v2-eda
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ else:
|
|
14 |
contribution_json = "contributors.json"
|
15 |
|
16 |
contribution_dict = json.load(open(contribution_json,"r"))
|
17 |
-
IGNORE_LIST = ["Bible","Tanzil",""]
|
18 |
|
19 |
splits = [split for split in os.listdir(CACHE_DIR) if split not in IGNORE_LIST]
|
20 |
|
@@ -44,17 +44,18 @@ def load_page(split):
|
|
44 |
meta = data["meta"]
|
45 |
with st.expander("Render Content"):
|
46 |
st.write(content)
|
47 |
-
st.
|
48 |
-
|
49 |
-
st.
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
54 |
#Word related count
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
|
59 |
|
60 |
demo_name = st.sidebar.selectbox("Choose a demo", splits)
|
|
|
14 |
contribution_json = "contributors.json"
|
15 |
|
16 |
contribution_dict = json.load(open(contribution_json,"r"))
|
17 |
+
IGNORE_LIST = ["Bible","Tanzil","GNOME"]
|
18 |
|
19 |
splits = [split for split in os.listdir(CACHE_DIR) if split not in IGNORE_LIST]
|
20 |
|
|
|
44 |
meta = data["meta"]
|
45 |
with st.expander("Render Content"):
|
46 |
st.write(content)
|
47 |
+
with st.expander("Raw Content"):
|
48 |
+
st.text(content)
|
49 |
+
with st.expander("Metadata and Metrics"):
|
50 |
+
st.write("### Meta:")
|
51 |
+
st.write(ast.literal_eval(meta))
|
52 |
+
# Tokenizer-related count
|
53 |
+
tokenized = tokenizer(content, return_length=True)['length'][0]
|
54 |
+
token_count_metric = st.metric("Token Count(compared to 2048)",value=tokenized,delta=4096-tokenized)
|
55 |
#Word related count
|
56 |
+
split_words = re.findall(r'\w+', content)
|
57 |
+
word_count_metric = st.metric("Word Count",value=len(split_words))
|
58 |
+
|
59 |
|
60 |
|
61 |
demo_name = st.sidebar.selectbox("Choose a demo", splits)
|