juanesbch commited on
Commit
dc74ed6
Β·
1 Parent(s): ae387e8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import transformers as trf
4
+
5
+ # Load the summarization model
6
+ summarization_model_path = 'ieuniversity/News-Translator'
7
+ summarization_tokenizer = trf.AutoTokenizer.from_pretrained(summarization_model_path)
8
+ summarization_model = trf.AutoModelForSeq2SeqLM.from_pretrained(summarization_model_path)
9
+
10
+ # Load the translation model
11
+ translation_model_path = 'hackathon-pln-es/t5-small-finetuned-spanish-to-quechua'
12
+ translation_tokenizer = trf.AutoTokenizer.from_pretrained(translation_model_path)
13
+ translation_model = trf.AutoModelForSeq2SeqLM.from_pretrained(translation_model_path)
14
+
15
+ def summarize_and_translate(news_text):
16
+ # Summarize the news article
17
+ max_input_length = 512
18
+ max_output_length = 128
19
+ input_encoded = summarization_tokenizer.encode_plus(news_text, add_special_tokens=True,
20
+ max_length=max_input_length, pad_to_max_length=True,
21
+ return_attention_mask=True, return_tensors='pt')
22
+ input_ids = input_encoded['input_ids']
23
+ attention_mask = input_encoded['attention_mask']
24
+ output_ids = summarization_model.generate(input_ids=input_ids, attention_mask=attention_mask,
25
+ max_length=max_output_length)
26
+ summary_text = summarization_tokenizer.decode(output_ids[0], skip_special_tokens=True)
27
+
28
+ # Translate the summary to Quechua
29
+ input_encoded = translation_tokenizer(summary_text, padding=True, truncation=True, max_length=512, return_tensors='pt')
30
+ input_ids = input_encoded['input_ids']
31
+ attention_mask = input_encoded['attention_mask']
32
+ output_ids = translation_model.generate(input_ids=input_ids, attention_mask=attention_mask,
33
+ max_length=512)
34
+ output_text = translation_tokenizer.decode(output_ids[0], skip_special_tokens=True)
35
+
36
+ return output_text
37
+
38
+ # Define the input and output interfaces for Gradio
39
+ input_interface = gr.inputs.Textbox(label="Input News Text (Spanish)")
40
+ output_interface = gr.outputs.Textbox(label="Output News Text (Quechua)")
41
+
42
+ # Create and launch the Gradio app
43
+
44
+ iface = gr.Interface(fn=summarize_and_translate, inputs=input_interface, outputs=output_interface, title="Spanish-to-Quechua News Summarization and Translation")
45
+ iface.launch()