import streamlit as st from datasets import load_dataset import pandas as pd import plotly.graph_objects as go from transformers import pipeline @st.cache_data def fetch_counts(): dataset = load_dataset("atlasia/darija-translation", split="train") dataset = pd.DataFrame(dataset) n_eng = len(dataset["en"].dropna()) n_fr = len(dataset["fr"].dropna()) n = len(dataset) return {"n_eng": n_eng, "n_fr": n_fr, "n": n} def terjman(input_text: str) -> str: pipe = pipeline("text2text-generation", model="atlasia/Terjman-Large") result = pipe(input_text, max_length=512) return result[0]["generated_text"] def transliterate(input_text: str) -> str: pipe = pipeline("text2text-generation", model="atlasia/Transliteration-Moroccan-Darija") result = pipe(input_text, max_length=50) return result[0]["generated_text"] if __name__ == "__main__": st.image("atlasia_white_wtext_nobg.png") counts = fetch_counts() n_goal = 100000 total_submissions = counts["n"] st.text("") # center text st.markdown( """

Contribute now to help build a better Darija dataset for all Moroccans Contribute here: https://atlasia.ma

""", unsafe_allow_html=True, ) st.divider() # with st.container() as c: # # add a block where users can input text and get a translation # st.markdown( # """ #
#

🔠Keyboard: Transliterate Letters from Latin to Arabic

#
# """, # unsafe_allow_html=True, # ) # col1, col2 = st.columns(2) # with col1: # input_text = st.text_area(":grey[Enter a word/letter in English ⬇]", "") # button = st.button("Transliterate") # with col2: # if button: # with st.spinner('Transliterating...'): # translation = transliterate(input_text) # st.text_area( # ":grey[Transliteration]", # translation, # ) # st.divider() with st.container() as c: # add a block where users can input text and get a translation st.markdown( """

💬Terjman: Translate to Darija

""", unsafe_allow_html=True, ) st.caption( """

This model has been developed thanks to your contributions. While it's not perfect yet, your continued input is key for making it better.

""", unsafe_allow_html=True, ) col1, col2 = st.columns(2) with col1: input_text = st.text_area(":grey[Enter a sentence in English ⬇]", "") button = st.button("Translate") with col2: if button: with st.spinner('Translating...'): translation = terjman(input_text) st.text_area( ":grey[Translation in Darija]", translation, ) # add a separator st.divider() st.markdown( """

📊 Data statistics

""", unsafe_allow_html=True, ) # make progress chart fig = go.Figure( go.Indicator( domain={"x": [0, 1], "y": [0, 1]}, value=total_submissions, mode="gauge+number+delta", title={"text": "Number of translations"}, delta={"reference": 42000}, gauge={ "axis": {"range": [0, n_goal]}, "steps": [ {"range": [0, total_submissions], "color": "gray"}, ], "threshold": { "line": {"color": "green", "width": 4}, "thickness": 0.75, "value": n_goal / 2, }, }, ) ) st.plotly_chart(fig, use_container_width=True) labels = ["English", "French"] values = [counts["n_eng"], counts["n_fr"]] # change color to blue and white fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.2, 0])]) fig.update_traces(marker=dict(colors=["#46607b", "#FFFFFF"])) st.plotly_chart(fig, use_container_width=True)