Spaces:
Sleeping
Sleeping
mj-new
commited on
Commit
•
4d749f0
1
Parent(s):
a958ea3
Added taxonomy tab for data catalog
Browse files- __pycache__/contants.cpython-310.pyc +0 -0
- app.py +10 -1
- contants.py +4 -0
__pycache__/contants.cpython-310.pyc
CHANGED
Binary files a/__pycache__/contants.cpython-310.pyc and b/__pycache__/contants.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
|
|
2 |
import streamlit as st
|
3 |
|
4 |
from app_utils import filter_dataframe, calculate_height_to_display
|
5 |
-
from contants import INFO_CATALOG, CITATION_CATALOG, HOWTO_CATALOG,INFO_BENCHMARK, CITATION_BENCHMARK, HOWTO_BENCHMARK, INFO_MAIN, CITATION_MAIN
|
6 |
from utils import BASE_SUMMARY_METRICS
|
7 |
from utils import load_data_catalog, load_data_taxonomy, load_bench_catalog, load_bench_taxonomy
|
8 |
from utils import datasets_count_and_size, datasets_count_and_size_standard, metadata_coverage, catalog_summary_statistics
|
@@ -126,6 +126,15 @@ with data_survey:
|
|
126 |
df_datasets_per_device = datasets_count_and_size(df_data_cat_available_paid, col_groupby, col_sort=col_groupby, col_percent = ['Size audio transcribed [hours]'], col_sum = ['Size audio transcribed [hours]','Audio recordings', 'Speakers'], col_count = ['Dataset ID'])
|
127 |
st.dataframe(df_datasets_per_device, use_container_width=False)
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
with bench_cat:
|
130 |
st.write("Benchmarks catalog")
|
131 |
# TODO - load and display benchmarks catalog
|
|
|
2 |
import streamlit as st
|
3 |
|
4 |
from app_utils import filter_dataframe, calculate_height_to_display
|
5 |
+
from contants import INFO_CATALOG, CITATION_CATALOG, HOWTO_CATALOG,INFO_BENCHMARK, CITATION_BENCHMARK, HOWTO_BENCHMARK, INFO_MAIN, CITATION_MAIN, HOWTO_TAXONOMY_CAT
|
6 |
from utils import BASE_SUMMARY_METRICS
|
7 |
from utils import load_data_catalog, load_data_taxonomy, load_bench_catalog, load_bench_taxonomy
|
8 |
from utils import datasets_count_and_size, datasets_count_and_size_standard, metadata_coverage, catalog_summary_statistics
|
|
|
126 |
df_datasets_per_device = datasets_count_and_size(df_data_cat_available_paid, col_groupby, col_sort=col_groupby, col_percent = ['Size audio transcribed [hours]'], col_sum = ['Size audio transcribed [hours]','Audio recordings', 'Speakers'], col_count = ['Dataset ID'])
|
127 |
st.dataframe(df_datasets_per_device, use_container_width=False)
|
128 |
|
129 |
+
with data_taxonomy:
|
130 |
+
st.title("Polish ASR Speech Data Taxonomy")
|
131 |
+
st.header("How to use?")
|
132 |
+
st.markdown(HOWTO_TAXONOMY_CAT, unsafe_allow_html=True)
|
133 |
+
st.dataframe(df_data_tax, hide_index=True, use_container_width=True)
|
134 |
+
st.header("How to cite?")
|
135 |
+
st.markdown(CITATION_CATALOG, unsafe_allow_html=True)
|
136 |
+
|
137 |
+
|
138 |
with bench_cat:
|
139 |
st.write("Benchmarks catalog")
|
140 |
# TODO - load and display benchmarks catalog
|
contants.py
CHANGED
@@ -52,6 +52,10 @@ Depending on the column type, you can use the search box to filter the content.
|
|
52 |
Please refer to the **ASR Speech Data Taxonomy** tab for the explanation of the columns. <br> \
|
53 |
If you looking for insights derived from the collected in the catalog, please go to **Polish ASR Speech Data Survey** tab. <br>"
|
54 |
|
|
|
|
|
|
|
|
|
55 |
############################################################################################################
|
56 |
INFO_BENCHMARK = "TODO"
|
57 |
|
|
|
52 |
Please refer to the **ASR Speech Data Taxonomy** tab for the explanation of the columns. <br> \
|
53 |
If you looking for insights derived from the collected in the catalog, please go to **Polish ASR Speech Data Survey** tab. <br>"
|
54 |
|
55 |
+
|
56 |
+
HOWTO_TAXONOMY_CAT = "This table presents descriptors (columns) used in the *Polish ASR Speech Datasets Catalog* <br> \
|
57 |
+
Taxonomy is also provided on [GitHub as TSV file](https://github.com/goodmike31/pl-asr-speech-data-survey/blob/main/snapshots/pl-asr-speech-datasets-catalog-latest.tsv) and [Google Sheets](https://docs.google.com/spreadsheets/d/181EDfwZNtHgHFOMaKNtgKssrYDX4tXTJ9POMzBsCRlI/edit#gid=2015613057)"
|
58 |
+
|
59 |
############################################################################################################
|
60 |
INFO_BENCHMARK = "TODO"
|
61 |
|