Spaces:

amu-cai
/

pl-asr-survey

Sleeping

App Files Files Community

mj-new commited on Mar 9

Commit

de78526

•

1 Parent(s): e283f70

Improved benchmark and data catalog info

Browse files

Files changed (3) hide show

__pycache__/contants.cpython-310.pyc +0 -0
app.py +5 -7
contants.py +18 -7

__pycache__/contants.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/contants.cpython-310.pyc and b/__pycache__/contants.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import pandas as pd
 import streamlit as st
 from app_utils import filter_dataframe, calculate_height_to_display
-from contants import INFO_CATALOG, INFO_BENCHMARK, INFO_SURVEY, CITATION_CATALOG, CITATION_BENCHMARK, CITATION_SURVEY
 from utils import BASE_SUMMARY_METRICS
 from utils import  load_data_catalog, load_data_taxonomy, load_bench_catalog, load_bench_taxonomy
 from utils import datasets_count_and_size, datasets_count_and_size_standard, metadata_coverage, catalog_summary_statistics
@@ -32,7 +32,7 @@ df_data_cat_available_paid = df_data_cat[(df_data_cat['Available online'] == 'ye
 df_bench_cat = load_bench_catalog()
 df_bench_tax = load_bench_taxonomy()
-data_cat, data_survey, bench_cat, bench_survey = st.tabs(["PL ASR speech datasets catalog", "PL ASR speech data survey", "PL ASR benchmarks catalog", "PL ASR benchmarks survey"])
 with data_cat:
@@ -41,12 +41,13 @@ with data_cat:
     st.markdown(INFO_CATALOG, unsafe_allow_html=True)
     st.header("How to use?")
-    #    sent = st.text_area("Text", WELCOME_TEXT, height = 275)
     st.header("How to cite?")
-    st.code(CITATION_CATALOG)
     # Display catalog contents
     st.dataframe(filter_dataframe(df_data_cat, "datasets"), hide_index=True, use_container_width=True)
     # Display taxonomy contents
@@ -124,9 +125,6 @@ with bench_cat:
     # TODO - load and display benchmarks catalog
     st.title("Polish ASR Benchmarks Catalog")
-    st.write(WELCOME_TEXT)
-    st.write(CITATION_TEXT)
     # Display catalog contents
     st.dataframe(filter_dataframe(df_bench_cat, "benchmarks"), hide_index=True, use_container_width=True)

 import streamlit as st
 from app_utils import filter_dataframe, calculate_height_to_display
+from contants import INFO_CATALOG, CITATION_CATALOG, HOWTO_CATALOG,INFO_BENCHMARK, CITATION_BENCHMARK, INFO_SURVEY, CITATION_SURVEY
 from utils import BASE_SUMMARY_METRICS
 from utils import  load_data_catalog, load_data_taxonomy, load_bench_catalog, load_bench_taxonomy
 from utils import datasets_count_and_size, datasets_count_and_size_standard, metadata_coverage, catalog_summary_statistics
 df_bench_cat = load_bench_catalog()
 df_bench_tax = load_bench_taxonomy()
+data_cat, data_taxonomy, data_survey, bench_cat, bench_taxonomy, bench_survey = st.tabs(["PL ASR speech data **catalog**", "PL ASR speech data **survey**", "ASR speech data **taxonomy**", "PL ASR benchmarks catalog", "ASR benchmarks taxonomy", "PL ASR benchmarks survey"])
 with data_cat:
     st.markdown(INFO_CATALOG, unsafe_allow_html=True)
     st.header("How to use?")
+    st.markdown(HOWTO_CATALOG, unsafe_allow_html=True)
     st.header("How to cite?")
+    st.markdown(CITATION_CATALOG, unsafe_allow_html=True)
     # Display catalog contents
+    st.header("Browse the catalog content")
     st.dataframe(filter_dataframe(df_data_cat, "datasets"), hide_index=True, use_container_width=True)
     # Display taxonomy contents
     # TODO - load and display benchmarks catalog
     st.title("Polish ASR Benchmarks Catalog")
     # Display catalog contents
     st.dataframe(filter_dataframe(df_bench_cat, "benchmarks"), hide_index=True, use_container_width=True)

contants.py CHANGED Viewed

@@ -1,16 +1,27 @@
 INFO_CATALOG = "This dashboard complements *Polish ASR Speech Datasets Catalog* available on [GitHub](https://github.com/goodmike31/pl-asr-speech-data-survey) and [Google Sheets](https://docs.google.com/spreadsheets/d/181EDfwZNtHgHFOMaKNtgKssrYDX4tXTJ9POMzBsCRlI/edit#gid=0) by providing:<br> \
-* More convenient browsing of the catalog content (see the *How to use?* section below) <br>\
-* Analysis of datasets utility for the purpose of ASR evaluation (see the *Dataset Utility Index* tab) <br>\
-* Analysis of the state of Polish ASR speech data (see the *Polish ASR Speech Data Survey* tab <br> \
-IMPORANT - Please share your feedback [HERE](https://forms.gle/EWJ6YfbJJTyEzQs66). <br>\
-Your feedback will help to assess the state of Polish ASR speech data from the community perspective.<br>\
-Each response is granted 50 PLN for the charity of choice."
 INFO_BENCHMARK = "TODO"
 INFO_SURVEY = "This dashboard complements [Polish Speech Datasets Survey]"
-CITATION_CATALOG="Please cite this work as: TODO"
 CITATION_BENCHMARK="Please cite this work as: TODO"

 INFO_CATALOG = "This dashboard complements *Polish ASR Speech Datasets Catalog* available on [GitHub](https://github.com/goodmike31/pl-asr-speech-data-survey) and [Google Sheets](https://docs.google.com/spreadsheets/d/181EDfwZNtHgHFOMaKNtgKssrYDX4tXTJ9POMzBsCRlI/edit#gid=0) by providing:<br> \
+* More convenient browsing of the catalog content (see the **How to use?** section below) <br>\
+* Analysis of datasets utility for the purpose of ASR evaluation (see the **Dataset Utility Index** tab) <br>\
+* Analysis of the state of Polish ASR speech data (see the **Polish ASR Speech Data Survey** tab <br><br> \
+IMPORTANT - You can share your feedback [HERE](https://forms.gle/EWJ6YfbJJTyEzQs66). <br>\
+Each response is granted 50 PLN for the charity of your choice. <br>\
+The feedback will help to assess the state of Polish ASR speech data from the community perspective.<br>\
+"
+CITATION_CATALOG="Please cite this work as: <br> \
+*@misc{pl-asr-speech-data-catalog, <br> \
+  title={Polish ASR Speech Datasets Catalog}, <br> \
+  author={Michał Junczyk}, <br> \
+  year={2023}, <br> \
+  publisher={Github}, <br> \
+  howpublished={\\url{https://github.com/goodmike31/pl-asr-speech-data-survey}}*<br>"
+HOWTO_CATALOG = "You can use the filters on the left to browse the catalog content. <br> \
+Please refer to the **Data Catalog Taxonomy** tab for the explanation of the columns. <br>"
 INFO_BENCHMARK = "TODO"
 INFO_SURVEY = "This dashboard complements [Polish Speech Datasets Survey]"
 CITATION_BENCHMARK="Please cite this work as: TODO"