khaled06 commited on
Commit
2751939
1 Parent(s): bac1fe1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModel, AutoTokenizer, pipeline
2
+ import gradio as gr
3
+ from PIL import Image
4
+
5
+ # Load OCR model
6
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
7
+ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map='cuda', low_cpu_mem_usage=True)
8
+
9
+ # Summarization model
10
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
11
+
12
+ # Question-Answering model (English)
13
+ pipe_qa_en = pipeline('question-answering', model="deepset/roberta-base-squad2")
14
+
15
+ # Question-Answering model (Arabic)
16
+ pipe_qa_ar = pipeline("question-answering", model="gp-tar4/QA_FineTuned")
17
+
18
+ # Translation model (English to Arabic)
19
+ pipe_to_arabic = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ar")
20
+
21
+
22
+ def summarize(text):
23
+ # Summarize the text
24
+ summary = summarizer(text, max_length=200, min_length=30, do_sample=False)
25
+ return summary[0]['summary_text']
26
+
27
+
28
+ def question_answering(question, context, language='english'):
29
+ QA_input = {'question': question, 'context': context}
30
+ if language == 'arabic':
31
+ return pipe_qa_ar(QA_input)['answer']
32
+ return pipe_qa_en(QA_input)['answer']
33
+
34
+
35
+ def to_arabic(text, max_length=512):
36
+ # Split the text into chunks if it is more than 512 characters
37
+ chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
38
+ translated_chunks = [pipe_to_arabic(chunk)[0]['translation_text'] for chunk in chunks]
39
+ return ' '.join(translated_chunks)
40
+
41
+
42
+ def process_image_and_text(image, text):
43
+ ocr_text = model.chat(tokenizer, image, ocr_type='ocr')
44
+ summarized_text = summarize(ocr_text)
45
+ return f"Input text: {text}\n\nProcessed OCR text: {summarized_text}"
46
+
47
+
48
+ def process_image_qa(language, image, question):
49
+ ocr_text = model.chat(tokenizer, image, ocr_type='ocr')
50
+ if language == 'arabic':
51
+ translated_text = to_arabic(ocr_text)
52
+ return question_answering(question, translated_text, language='arabic')
53
+ return question_answering(question, ocr_text)
54
+
55
+
56
+ # Gradio interfaces
57
+ summarization_Interface = gr.Interface(
58
+ fn=process_image_and_text,
59
+ inputs=[gr.Image(type="filepath", label="Upload Image"), gr.Textbox(label="Input Text")],
60
+ outputs=gr.Textbox(label="Output Text"),
61
+ title="OCR & Summarization",
62
+ description="Upload an image and provide some text for summarization."
63
+ )
64
+
65
+ qa_Interface = gr.Interface(
66
+ fn=process_image_qa,
67
+ inputs=[gr.Radio(['Arabic', 'English'], label='Select Language', value='Arabic'), gr.Image(type="filepath", label="Upload Image"), gr.Textbox(label="Input Question")],
68
+ outputs=gr.Textbox(label="Answer Text"),
69
+ title="OCR & Question Answering",
70
+ description="Upload an image and ask a question in English or Arabic."
71
+ )
72
+
73
+ # Combine both interfaces into a tabbed interface
74
+ apps_interface = gr.TabbedInterface([summarization_Interface, qa_Interface], tab_names=["Summarization", "Question Answering"])
75
+
76
+ # Launch the app
77
+ apps_interface.launch()