mj-new commited on
Commit
de78526
1 Parent(s): e283f70

Improved benchmark and data catalog info

Browse files
Files changed (3) hide show
  1. __pycache__/contants.cpython-310.pyc +0 -0
  2. app.py +5 -7
  3. contants.py +18 -7
__pycache__/contants.cpython-310.pyc CHANGED
Binary files a/__pycache__/contants.cpython-310.pyc and b/__pycache__/contants.cpython-310.pyc differ
 
app.py CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
2
  import streamlit as st
3
 
4
  from app_utils import filter_dataframe, calculate_height_to_display
5
- from contants import INFO_CATALOG, INFO_BENCHMARK, INFO_SURVEY, CITATION_CATALOG, CITATION_BENCHMARK, CITATION_SURVEY
6
  from utils import BASE_SUMMARY_METRICS
7
  from utils import load_data_catalog, load_data_taxonomy, load_bench_catalog, load_bench_taxonomy
8
  from utils import datasets_count_and_size, datasets_count_and_size_standard, metadata_coverage, catalog_summary_statistics
@@ -32,7 +32,7 @@ df_data_cat_available_paid = df_data_cat[(df_data_cat['Available online'] == 'ye
32
  df_bench_cat = load_bench_catalog()
33
  df_bench_tax = load_bench_taxonomy()
34
 
35
- data_cat, data_survey, bench_cat, bench_survey = st.tabs(["PL ASR speech datasets catalog", "PL ASR speech data survey", "PL ASR benchmarks catalog", "PL ASR benchmarks survey"])
36
 
37
 
38
  with data_cat:
@@ -41,12 +41,13 @@ with data_cat:
41
  st.markdown(INFO_CATALOG, unsafe_allow_html=True)
42
 
43
  st.header("How to use?")
44
- # sent = st.text_area("Text", WELCOME_TEXT, height = 275)
45
 
46
  st.header("How to cite?")
47
- st.code(CITATION_CATALOG)
48
 
49
  # Display catalog contents
 
50
  st.dataframe(filter_dataframe(df_data_cat, "datasets"), hide_index=True, use_container_width=True)
51
 
52
  # Display taxonomy contents
@@ -124,9 +125,6 @@ with bench_cat:
124
  # TODO - load and display benchmarks catalog
125
  st.title("Polish ASR Benchmarks Catalog")
126
 
127
- st.write(WELCOME_TEXT)
128
-
129
- st.write(CITATION_TEXT)
130
 
131
  # Display catalog contents
132
  st.dataframe(filter_dataframe(df_bench_cat, "benchmarks"), hide_index=True, use_container_width=True)
 
2
  import streamlit as st
3
 
4
  from app_utils import filter_dataframe, calculate_height_to_display
5
+ from contants import INFO_CATALOG, CITATION_CATALOG, HOWTO_CATALOG,INFO_BENCHMARK, CITATION_BENCHMARK, INFO_SURVEY, CITATION_SURVEY
6
  from utils import BASE_SUMMARY_METRICS
7
  from utils import load_data_catalog, load_data_taxonomy, load_bench_catalog, load_bench_taxonomy
8
  from utils import datasets_count_and_size, datasets_count_and_size_standard, metadata_coverage, catalog_summary_statistics
 
32
  df_bench_cat = load_bench_catalog()
33
  df_bench_tax = load_bench_taxonomy()
34
 
35
+ data_cat, data_taxonomy, data_survey, bench_cat, bench_taxonomy, bench_survey = st.tabs(["PL ASR speech data **catalog**", "PL ASR speech data **survey**", "ASR speech data **taxonomy**", "PL ASR benchmarks catalog", "ASR benchmarks taxonomy", "PL ASR benchmarks survey"])
36
 
37
 
38
  with data_cat:
 
41
  st.markdown(INFO_CATALOG, unsafe_allow_html=True)
42
 
43
  st.header("How to use?")
44
+ st.markdown(HOWTO_CATALOG, unsafe_allow_html=True)
45
 
46
  st.header("How to cite?")
47
+ st.markdown(CITATION_CATALOG, unsafe_allow_html=True)
48
 
49
  # Display catalog contents
50
+ st.header("Browse the catalog content")
51
  st.dataframe(filter_dataframe(df_data_cat, "datasets"), hide_index=True, use_container_width=True)
52
 
53
  # Display taxonomy contents
 
125
  # TODO - load and display benchmarks catalog
126
  st.title("Polish ASR Benchmarks Catalog")
127
 
 
 
 
128
 
129
  # Display catalog contents
130
  st.dataframe(filter_dataframe(df_bench_cat, "benchmarks"), hide_index=True, use_container_width=True)
contants.py CHANGED
@@ -1,16 +1,27 @@
1
  INFO_CATALOG = "This dashboard complements *Polish ASR Speech Datasets Catalog* available on [GitHub](https://github.com/goodmike31/pl-asr-speech-data-survey) and [Google Sheets](https://docs.google.com/spreadsheets/d/181EDfwZNtHgHFOMaKNtgKssrYDX4tXTJ9POMzBsCRlI/edit#gid=0) by providing:<br> \
2
- * More convenient browsing of the catalog content (see the *How to use?* section below) <br>\
3
- * Analysis of datasets utility for the purpose of ASR evaluation (see the *Dataset Utility Index* tab) <br>\
4
- * Analysis of the state of Polish ASR speech data (see the *Polish ASR Speech Data Survey* tab <br> \
5
- IMPORANT - Please share your feedback [HERE](https://forms.gle/EWJ6YfbJJTyEzQs66). <br>\
6
- Your feedback will help to assess the state of Polish ASR speech data from the community perspective.<br>\
7
- Each response is granted 50 PLN for the charity of choice."
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  INFO_BENCHMARK = "TODO"
10
 
11
  INFO_SURVEY = "This dashboard complements [Polish Speech Datasets Survey]"
12
 
13
- CITATION_CATALOG="Please cite this work as: TODO"
14
 
15
  CITATION_BENCHMARK="Please cite this work as: TODO"
16
 
 
1
  INFO_CATALOG = "This dashboard complements *Polish ASR Speech Datasets Catalog* available on [GitHub](https://github.com/goodmike31/pl-asr-speech-data-survey) and [Google Sheets](https://docs.google.com/spreadsheets/d/181EDfwZNtHgHFOMaKNtgKssrYDX4tXTJ9POMzBsCRlI/edit#gid=0) by providing:<br> \
2
+ * More convenient browsing of the catalog content (see the **How to use?** section below) <br>\
3
+ * Analysis of datasets utility for the purpose of ASR evaluation (see the **Dataset Utility Index** tab) <br>\
4
+ * Analysis of the state of Polish ASR speech data (see the **Polish ASR Speech Data Survey** tab <br><br> \
5
+ IMPORTANT - You can share your feedback [HERE](https://forms.gle/EWJ6YfbJJTyEzQs66). <br>\
6
+ Each response is granted 50 PLN for the charity of your choice. <br>\
7
+ The feedback will help to assess the state of Polish ASR speech data from the community perspective.<br>\
8
+ "
9
+
10
+ CITATION_CATALOG="Please cite this work as: <br> \
11
+ *@misc{pl-asr-speech-data-catalog, <br> \
12
+ title={Polish ASR Speech Datasets Catalog}, <br> \
13
+ author={Michał Junczyk}, <br> \
14
+ year={2023}, <br> \
15
+ publisher={Github}, <br> \
16
+ howpublished={\\url{https://github.com/goodmike31/pl-asr-speech-data-survey}}*<br>"
17
+
18
+ HOWTO_CATALOG = "You can use the filters on the left to browse the catalog content. <br> \
19
+ Please refer to the **Data Catalog Taxonomy** tab for the explanation of the columns. <br>"
20
 
21
  INFO_BENCHMARK = "TODO"
22
 
23
  INFO_SURVEY = "This dashboard complements [Polish Speech Datasets Survey]"
24
 
 
25
 
26
  CITATION_BENCHMARK="Please cite this work as: TODO"
27