Spaces:

darshankr
/

trans-en-indic

Runtime error

App Files Files Community

darshankr commited on Oct 27, 2024

Commit

0b7c166

verified ·

1 Parent(s): df8f230

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -73

app.py CHANGED Viewed

@@ -1,71 +1,46 @@
 # app.py
 import streamlit as st
 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from IndicTransToolkit import IndicProcessor
-from typing import List
-import sys
-from starlette.applications import Starlette
-from starlette.routing import Mount
-from starlette.staticfiles import StaticFiles
-import nest_asyncio
-from api import app
-# Enable nested event loops
-nest_asyncio.apply()
-# Initialize models and processors (lazy loading)
-@st.cache_resource
-def load_models():
-    model = AutoModelForSeq2SeqLM.from_pretrained(
-        "ai4bharat/indictrans2-en-indic-1B",
-        trust_remote_code=True
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        "ai4bharat/indictrans2-en-indic-1B",
-        trust_remote_code=True
-    )
-    ip = IndicProcessor(inference=True)
-    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-    model = model.to(DEVICE)
-    return model, tokenizer, ip, DEVICE
-# Global variables for models
-model, tokenizer, ip, DEVICE = load_models()
 def translate_text(sentences: List[str], target_lang: str):
     try:
         src_lang = "eng_Latn"
-        batch = ip.preprocess_batch(
-            sentences,
-            src_lang=src_lang,
-            tgt_lang=target_lang
-        )
-        inputs = tokenizer(
-            batch,
-            truncation=True,
-            padding="longest",
-            return_tensors="pt",
-            return_attention_mask=True
-        ).to(DEVICE)
         with torch.no_grad():
             generated_tokens = model.generate(
-                **inputs,
                 use_cache=True,
                 min_length=0,
                 max_length=256,
                 num_beams=5,
                 num_return_sequences=1
             )
         with tokenizer.as_target_tokenizer():
             generated_tokens = tokenizer.batch_decode(
                 generated_tokens.detach().cpu().tolist(),
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=True
             )
         translations = ip.postprocess_batch(generated_tokens, lang=target_lang)
         return {
             "translations": translations,
@@ -75,12 +50,13 @@ def translate_text(sentences: List[str], target_lang: str):
     except Exception as e:
         raise Exception(f"Translation failed: {str(e)}")
-def streamlit_app():
     st.title("Indic Language Translator")
     # Input text
     text_input = st.text_area("Enter text to translate:", "Hello, how are you?")
     # Language selection
     target_languages = {
         "Hindi": "hin_Deva",
@@ -95,17 +71,13 @@ def streamlit_app():
         "Odia": "ori_Orya"
     }
-    target_lang = st.selectbox(
-        "Select target language:",
-        options=list(target_languages.keys())
-    )
     if st.button("Translate"):
         try:
-            result = translate_text(
-                sentences=[text_input],
-                target_lang=target_languages[target_lang]
-            )
             st.success("Translation:")
             st.write(result["translations"][0])
         except Exception as e:
@@ -116,9 +88,8 @@ def streamlit_app():
     st.header("API Documentation")
     st.markdown("""
     To use the translation API, send POST requests to:
-    ```
-    https://YOUR-SPACE-NAME.hf.space/api/translate
-    ```
     Request body format:
     ```json
     {
@@ -126,21 +97,19 @@ def streamlit_app():
         "target_lang": "hin_Deva"
     }
     ```
-    """)
-    st.markdown("Available target languages:")
-    for lang, code in target_languages.items():
-        st.markdown(f"- {lang}: `{code}`")
-def create_app():
-    routes = [
-        Mount("/api", app),
-        Mount("/", StaticFiles(directory="static", html=True), name="static"),
-    ]
-    return Starlette(routes=routes)
 if __name__ == "__main__":
-    if "streamlit" in sys.argv[0]:
-        streamlit_app()
-    else:
-        import uvicorn
-        uvicorn.run(create_app(), host="0.0.0.0", port=7860)

 # app.py
 import streamlit as st
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List
 import torch
+import asyncio
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from IndicTransToolkit import IndicProcessor
+import requests
+import json
+# Initialize models and processors
+model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True)
+ip = IndicProcessor(inference=True)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(DEVICE)
 def translate_text(sentences: List[str], target_lang: str):
     try:
         src_lang = "eng_Latn"
+        batch = ip.preprocess_batch(sentences, src_lang=src_lang, tgt_lang=target_lang)
+        inputs = tokenizer(batch, truncation=True, padding="longest", return_tensors="pt", return_attention_mask=True).to(DEVICE)
         with torch.no_grad():
             generated_tokens = model.generate(
+                inputs,
                 use_cache=True,
                 min_length=0,
                 max_length=256,
                 num_beams=5,
                 num_return_sequences=1
             )
         with tokenizer.as_target_tokenizer():
             generated_tokens = tokenizer.batch_decode(
                 generated_tokens.detach().cpu().tolist(),
                 skip_special_tokens=True,
                 clean_up_tokenization_spaces=True
             )
         translations = ip.postprocess_batch(generated_tokens, lang=target_lang)
         return {
             "translations": translations,
     except Exception as e:
         raise Exception(f"Translation failed: {str(e)}")
+# Streamlit interface
+def main():
     st.title("Indic Language Translator")
     # Input text
     text_input = st.text_area("Enter text to translate:", "Hello, how are you?")
     # Language selection
     target_languages = {
         "Hindi": "hin_Deva",
         "Odia": "ori_Orya"
     }
+    target_lang = st.selectbox("Select target language:", options=list(target_languages.keys()))
     if st.button("Translate"):
         try:
+            result = translate_text(sentences=[text_input], target_lang=target_languages[target_lang])
+            # Display result
             st.success("Translation:")
             st.write(result["translations"][0])
         except Exception as e:
     st.header("API Documentation")
     st.markdown("""
     To use the translation API, send POST requests to:
+    https://USERNAME-SPACE_NAME.hf.space/translate
     Request body format:
     ```json
     {
         "target_lang": "hin_Deva"
     }
     ```
+    Available target languages:
+    - Hindi: hin_Deva
+    - Bengali: ben_Beng
+    - Tamil: tam_Taml
+    - Telugu: tel_Telu
+    - Marathi: mar_Deva
+    - Gujarati: guj_Gujr
+    - Kannada: kan_Knda
+    - Malayalam: mal_Mlym
+    - Punjabi: pan_Guru
+    - Odia: ori_Orya
+    """)
 if __name__ == "__main__":
+    main()