import streamlit as st from transformers import T5Tokenizer, T5ForConditionalGeneration from pdfminer.high_level import extract_text def main(): st.title("PDF Translation") # Upload the pdf uploaded_file = st.file_uploader("Upload a PDF file and we will translate the text inside to German and French.", type=["pdf"]) if uploaded_file is not None: # Extract text from pdf text = extract_text(uploaded_file) tokenizer = T5Tokenizer.from_pretrained("t5-small") model = T5ForConditionalGeneration.from_pretrained("t5-small") # Define translation prefixes for each language translation_prefixes = { "german": "translate English to German: ", "french": "translate English to French: " } # Generate translations for each language translations = {} # Buttons to trigger translation translate_german = st.button("Translate to German") translate_french = st.button("Translate to French") for language, prefix in translation_prefixes.items(): # Translate the entire text, not page by page text_to_translate = prefix + text input_ids = tokenizer(text_to_translate, return_tensors="pt").input_ids outputs = model.generate(input_ids=input_ids, max_length=150, num_beams=4, no_repeat_ngram_size=2) translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) translations[language] = translated_text # Display the translations based on the button clicked if translate_german: display_translation(translations["german"], "German") if translate_french: display_translation(translations["french"], "French") def display_translation(translation, language): st.write(f"\nLanguage: {language}") st.write(f"Translation: {translation}") if __name__ == "__main__": main()