Spaces:
Running
Running
import streamlit as st | |
from datasets import load_dataset | |
import pandas as pd | |
import plotly.graph_objects as go | |
from transformers import pipeline | |
def fetch_counts(): | |
dataset = load_dataset("atlasia/darija-translation", split="train") | |
dataset = pd.DataFrame(dataset) | |
n_eng = len(dataset["en"].dropna()) | |
n_fr = len(dataset["fr"].dropna()) | |
n = len(dataset) | |
return {"n_eng": n_eng, "n_fr": n_fr, "n": n} | |
def terjman(input_text: str) -> str: | |
pipe = pipeline("text2text-generation", model="atlasia/Terjman-Large") | |
result = pipe(input_text, max_length=512) | |
return result[0]["generated_text"] | |
def transliterate(input_text: str) -> str: | |
pipe = pipeline("text2text-generation", model="atlasia/Transliteration-Moroccan-Darija") | |
result = pipe(input_text, max_length=50) | |
return result[0]["generated_text"] | |
if __name__ == "__main__": | |
st.image("atlasia_white_wtext_nobg.png") | |
counts = fetch_counts() | |
n_goal = 100000 | |
total_submissions = counts["n"] | |
st.text("") | |
# center text | |
st.markdown( | |
""" | |
<h1 style='text-align: center; font-size: 20px;'> | |
Contribute now to help build a better Darija dataset for all Moroccans | |
Contribute here: <a href="https://atlasia.ma" target="_blank">https://atlasia.ma</a> | |
</h1> | |
""", | |
unsafe_allow_html=True, | |
) | |
st.divider() | |
# with st.container() as c: | |
# # add a block where users can input text and get a translation | |
# st.markdown( | |
# """ | |
# <div style='text-align: center;'> | |
# <h3>🔠Keyboard: Transliterate Letters from Latin to Arabic</h3 | |
# > | |
# </div> | |
# """, | |
# unsafe_allow_html=True, | |
# ) | |
# col1, col2 = st.columns(2) | |
# with col1: | |
# input_text = st.text_area(":grey[Enter a word/letter in English ⬇]", "") | |
# button = st.button("Transliterate") | |
# with col2: | |
# if button: | |
# with st.spinner('Transliterating...'): | |
# translation = transliterate(input_text) | |
# st.text_area( | |
# ":grey[Transliteration]", | |
# translation, | |
# ) | |
# st.divider() | |
with st.container() as c: | |
# add a block where users can input text and get a translation | |
st.markdown( | |
""" | |
<div style='text-align: center;'> | |
<h3>💬Terjman: Translate to Darija</h3 | |
> | |
</div> | |
""", | |
unsafe_allow_html=True, | |
) | |
st.caption( | |
""" | |
<div style='text-align: center;'> | |
<h3>This model has been developed thanks to your contributions. | |
While it's not perfect yet, your continued input is key for making it better.</h3> | |
</div> | |
""", | |
unsafe_allow_html=True, | |
) | |
col1, col2 = st.columns(2) | |
with col1: | |
input_text = st.text_area(":grey[Enter a sentence in English ⬇]", "") | |
button = st.button("Translate") | |
with col2: | |
if button: | |
with st.spinner('Translating...'): | |
translation = terjman(input_text) | |
st.text_area( | |
":grey[Translation in Darija]", | |
translation, | |
) | |
# add a separator | |
st.divider() | |
st.markdown( | |
""" | |
<div style='text-align: center;'> | |
<h3>📊 Data statistics</h3 | |
> | |
</div> | |
""", | |
unsafe_allow_html=True, | |
) | |
# make progress chart | |
fig = go.Figure( | |
go.Indicator( | |
domain={"x": [0, 1], "y": [0, 1]}, | |
value=total_submissions, | |
mode="gauge+number+delta", | |
title={"text": "Number of translations"}, | |
delta={"reference": 42000}, | |
gauge={ | |
"axis": {"range": [0, n_goal]}, | |
"steps": [ | |
{"range": [0, total_submissions], "color": "gray"}, | |
], | |
"threshold": { | |
"line": {"color": "green", "width": 4}, | |
"thickness": 0.75, | |
"value": n_goal / 2, | |
}, | |
}, | |
) | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
labels = ["English", "French"] | |
values = [counts["n_eng"], counts["n_fr"]] | |
# change color to blue and white | |
fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.2, 0])]) | |
fig.update_traces(marker=dict(colors=["#46607b", "#FFFFFF"])) | |
st.plotly_chart(fig, use_container_width=True) | |