Spaces:

IsmayilMasimov36
/

test

Sleeping

App Files Files Community

IsmayilMasimov36 commited on Jan 4

Commit

4f8c634

•

1 Parent(s): e54ed97

Create app.py

Browse files

Files changed (1) hide show

app.py +59 -0

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import streamlit as st
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+from pathlib import Path
+from pdfminer.high_level import extract_text
+def main():
+    st.title("PDF Translation")
+    st.write("Upload a PDF file and we will translate the text inside to German and French.")
+    # Upload the pdf
+    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+    if uploaded_file is not None:
+        # Extract text from pdf
+        documents = extract_text(uploaded_file)
+        tokenizer = T5Tokenizer.from_pretrained("t5-small")
+        model = T5ForConditionalGeneration.from_pretrained("t5-small")
+        # Define translation prefixes for each language
+        translation_prefixes = {
+            "german": "translate English to German: ",
+            "french": "translate English to French: "
+        }
+        # Generate translations for each language for each document
+        translations = {}
+        # Buttons to trigger translation
+        translate_german = st.button("Translate to German")
+        translate_french = st.button("Translate to French")
+        for language, prefix in translation_prefixes.items():
+            document_translations = []
+            for idx, document in enumerate(documents, 1):
+                text = prefix + document.text
+                input_ids = tokenizer(text, return_tensors="pt").input_ids
+                outputs = model.generate(input_ids=input_ids, max_length=50, num_beams=4, no_repeat_ngram_size=2)
+                translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                document_translations.append(translated_text)
+            translations[language] = document_translations
+        # Display the translations based on the button clicked
+        if translate_german:
+            display_translations(translations["german"], "German")
+        if translate_french:
+            display_translations(translations["french"], "French")
+def display_translations(translations, language):
+    st.write(f"\nLanguage: {language}")
+    for idx, translation in enumerate(translations, 1):
+        st.write(f"Page {idx}: {translation}")
+if __name__ == "__main__":
+    main()