Spaces:
Sleeping
Sleeping
mj-new
commited on
Commit
•
de78526
1
Parent(s):
e283f70
Improved benchmark and data catalog info
Browse files- __pycache__/contants.cpython-310.pyc +0 -0
- app.py +5 -7
- contants.py +18 -7
__pycache__/contants.cpython-310.pyc
CHANGED
Binary files a/__pycache__/contants.cpython-310.pyc and b/__pycache__/contants.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
|
|
2 |
import streamlit as st
|
3 |
|
4 |
from app_utils import filter_dataframe, calculate_height_to_display
|
5 |
-
from contants import INFO_CATALOG,
|
6 |
from utils import BASE_SUMMARY_METRICS
|
7 |
from utils import load_data_catalog, load_data_taxonomy, load_bench_catalog, load_bench_taxonomy
|
8 |
from utils import datasets_count_and_size, datasets_count_and_size_standard, metadata_coverage, catalog_summary_statistics
|
@@ -32,7 +32,7 @@ df_data_cat_available_paid = df_data_cat[(df_data_cat['Available online'] == 'ye
|
|
32 |
df_bench_cat = load_bench_catalog()
|
33 |
df_bench_tax = load_bench_taxonomy()
|
34 |
|
35 |
-
data_cat, data_survey, bench_cat, bench_survey = st.tabs(["PL ASR speech
|
36 |
|
37 |
|
38 |
with data_cat:
|
@@ -41,12 +41,13 @@ with data_cat:
|
|
41 |
st.markdown(INFO_CATALOG, unsafe_allow_html=True)
|
42 |
|
43 |
st.header("How to use?")
|
44 |
-
|
45 |
|
46 |
st.header("How to cite?")
|
47 |
-
st.
|
48 |
|
49 |
# Display catalog contents
|
|
|
50 |
st.dataframe(filter_dataframe(df_data_cat, "datasets"), hide_index=True, use_container_width=True)
|
51 |
|
52 |
# Display taxonomy contents
|
@@ -124,9 +125,6 @@ with bench_cat:
|
|
124 |
# TODO - load and display benchmarks catalog
|
125 |
st.title("Polish ASR Benchmarks Catalog")
|
126 |
|
127 |
-
st.write(WELCOME_TEXT)
|
128 |
-
|
129 |
-
st.write(CITATION_TEXT)
|
130 |
|
131 |
# Display catalog contents
|
132 |
st.dataframe(filter_dataframe(df_bench_cat, "benchmarks"), hide_index=True, use_container_width=True)
|
|
|
2 |
import streamlit as st
|
3 |
|
4 |
from app_utils import filter_dataframe, calculate_height_to_display
|
5 |
+
from contants import INFO_CATALOG, CITATION_CATALOG, HOWTO_CATALOG,INFO_BENCHMARK, CITATION_BENCHMARK, INFO_SURVEY, CITATION_SURVEY
|
6 |
from utils import BASE_SUMMARY_METRICS
|
7 |
from utils import load_data_catalog, load_data_taxonomy, load_bench_catalog, load_bench_taxonomy
|
8 |
from utils import datasets_count_and_size, datasets_count_and_size_standard, metadata_coverage, catalog_summary_statistics
|
|
|
32 |
df_bench_cat = load_bench_catalog()
|
33 |
df_bench_tax = load_bench_taxonomy()
|
34 |
|
35 |
+
data_cat, data_taxonomy, data_survey, bench_cat, bench_taxonomy, bench_survey = st.tabs(["PL ASR speech data **catalog**", "PL ASR speech data **survey**", "ASR speech data **taxonomy**", "PL ASR benchmarks catalog", "ASR benchmarks taxonomy", "PL ASR benchmarks survey"])
|
36 |
|
37 |
|
38 |
with data_cat:
|
|
|
41 |
st.markdown(INFO_CATALOG, unsafe_allow_html=True)
|
42 |
|
43 |
st.header("How to use?")
|
44 |
+
st.markdown(HOWTO_CATALOG, unsafe_allow_html=True)
|
45 |
|
46 |
st.header("How to cite?")
|
47 |
+
st.markdown(CITATION_CATALOG, unsafe_allow_html=True)
|
48 |
|
49 |
# Display catalog contents
|
50 |
+
st.header("Browse the catalog content")
|
51 |
st.dataframe(filter_dataframe(df_data_cat, "datasets"), hide_index=True, use_container_width=True)
|
52 |
|
53 |
# Display taxonomy contents
|
|
|
125 |
# TODO - load and display benchmarks catalog
|
126 |
st.title("Polish ASR Benchmarks Catalog")
|
127 |
|
|
|
|
|
|
|
128 |
|
129 |
# Display catalog contents
|
130 |
st.dataframe(filter_dataframe(df_bench_cat, "benchmarks"), hide_index=True, use_container_width=True)
|
contants.py
CHANGED
@@ -1,16 +1,27 @@
|
|
1 |
INFO_CATALOG = "This dashboard complements *Polish ASR Speech Datasets Catalog* available on [GitHub](https://github.com/goodmike31/pl-asr-speech-data-survey) and [Google Sheets](https://docs.google.com/spreadsheets/d/181EDfwZNtHgHFOMaKNtgKssrYDX4tXTJ9POMzBsCRlI/edit#gid=0) by providing:<br> \
|
2 |
-
* More convenient browsing of the catalog content (see the
|
3 |
-
* Analysis of datasets utility for the purpose of ASR evaluation (see the
|
4 |
-
* Analysis of the state of Polish ASR speech data (see the
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
INFO_BENCHMARK = "TODO"
|
10 |
|
11 |
INFO_SURVEY = "This dashboard complements [Polish Speech Datasets Survey]"
|
12 |
|
13 |
-
CITATION_CATALOG="Please cite this work as: TODO"
|
14 |
|
15 |
CITATION_BENCHMARK="Please cite this work as: TODO"
|
16 |
|
|
|
1 |
INFO_CATALOG = "This dashboard complements *Polish ASR Speech Datasets Catalog* available on [GitHub](https://github.com/goodmike31/pl-asr-speech-data-survey) and [Google Sheets](https://docs.google.com/spreadsheets/d/181EDfwZNtHgHFOMaKNtgKssrYDX4tXTJ9POMzBsCRlI/edit#gid=0) by providing:<br> \
|
2 |
+
* More convenient browsing of the catalog content (see the **How to use?** section below) <br>\
|
3 |
+
* Analysis of datasets utility for the purpose of ASR evaluation (see the **Dataset Utility Index** tab) <br>\
|
4 |
+
* Analysis of the state of Polish ASR speech data (see the **Polish ASR Speech Data Survey** tab <br><br> \
|
5 |
+
IMPORTANT - You can share your feedback [HERE](https://forms.gle/EWJ6YfbJJTyEzQs66). <br>\
|
6 |
+
Each response is granted 50 PLN for the charity of your choice. <br>\
|
7 |
+
The feedback will help to assess the state of Polish ASR speech data from the community perspective.<br>\
|
8 |
+
"
|
9 |
+
|
10 |
+
CITATION_CATALOG="Please cite this work as: <br> \
|
11 |
+
*@misc{pl-asr-speech-data-catalog, <br> \
|
12 |
+
title={Polish ASR Speech Datasets Catalog}, <br> \
|
13 |
+
author={Michał Junczyk}, <br> \
|
14 |
+
year={2023}, <br> \
|
15 |
+
publisher={Github}, <br> \
|
16 |
+
howpublished={\\url{https://github.com/goodmike31/pl-asr-speech-data-survey}}*<br>"
|
17 |
+
|
18 |
+
HOWTO_CATALOG = "You can use the filters on the left to browse the catalog content. <br> \
|
19 |
+
Please refer to the **Data Catalog Taxonomy** tab for the explanation of the columns. <br>"
|
20 |
|
21 |
INFO_BENCHMARK = "TODO"
|
22 |
|
23 |
INFO_SURVEY = "This dashboard complements [Polish Speech Datasets Survey]"
|
24 |
|
|
|
25 |
|
26 |
CITATION_BENCHMARK="Please cite this work as: TODO"
|
27 |
|