IsmayilMasimov36 commited on
Commit
4f8c634
1 Parent(s): e54ed97

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
3
+ from pathlib import Path
4
+ from pdfminer.high_level import extract_text
5
+
6
+ def main():
7
+ st.title("PDF Translation")
8
+ st.write("Upload a PDF file and we will translate the text inside to German and French.")
9
+
10
+ # Upload the pdf
11
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
12
+
13
+ if uploaded_file is not None:
14
+ # Extract text from pdf
15
+ documents = extract_text(uploaded_file)
16
+ tokenizer = T5Tokenizer.from_pretrained("t5-small")
17
+ model = T5ForConditionalGeneration.from_pretrained("t5-small")
18
+
19
+ # Define translation prefixes for each language
20
+ translation_prefixes = {
21
+ "german": "translate English to German: ",
22
+ "french": "translate English to French: "
23
+ }
24
+
25
+ # Generate translations for each language for each document
26
+ translations = {}
27
+
28
+ # Buttons to trigger translation
29
+ translate_german = st.button("Translate to German")
30
+ translate_french = st.button("Translate to French")
31
+
32
+ for language, prefix in translation_prefixes.items():
33
+ document_translations = []
34
+
35
+ for idx, document in enumerate(documents, 1):
36
+ text = prefix + document.text
37
+ input_ids = tokenizer(text, return_tensors="pt").input_ids
38
+ outputs = model.generate(input_ids=input_ids, max_length=50, num_beams=4, no_repeat_ngram_size=2)
39
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+ document_translations.append(translated_text)
41
+
42
+ translations[language] = document_translations
43
+
44
+ # Display the translations based on the button clicked
45
+ if translate_german:
46
+ display_translations(translations["german"], "German")
47
+
48
+ if translate_french:
49
+ display_translations(translations["french"], "French")
50
+
51
+
52
+ def display_translations(translations, language):
53
+ st.write(f"\nLanguage: {language}")
54
+ for idx, translation in enumerate(translations, 1):
55
+ st.write(f"Page {idx}: {translation}")
56
+
57
+
58
+ if __name__ == "__main__":
59
+ main()