sitammeur commited on
Commit
dbfc97a
·
verified ·
1 Parent(s): 36a73d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -38
app.py CHANGED
@@ -1,38 +1,42 @@
1
- # Importing the requirements
2
- import gradio as gr
3
- from src.task import ocr_task
4
-
5
-
6
- # Image input for the interface
7
- image = gr.Image(type="pil", label="Image")
8
-
9
- # Output for the interface (image and text)
10
- ocr_text_output = gr.Textbox(label="OCR Text")
11
- ocr_image_output = gr.Image(type="pil", label="Output Image")
12
-
13
- # Examples for the interface (image paths)
14
- examples = [
15
- ["images/ocr_image_1jpg"],
16
- ["images/ocr_image_2.jpg"],
17
- ["images/ocr_image_3.jpg"],
18
- ]
19
-
20
- # Title, description, and article for the interface
21
- title = "OCR Text Extraction and Visualization"
22
- description = "Gradio Demo for the Florence-2-large Vision Language Model. This application performs Optical Character Recognition (OCR) on images and provides both extracted text and visualized bounding boxes around detected text regions. To use it, simply upload your image and click 'Submit'. The application will return the detected text and an image with bounding boxes drawn around the detected text regions. This is useful for various OCR-related tasks including document digitization, text extraction, and visual verification of detected text regions."
23
- article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2311.06242' target='_blank'>Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks</a> | <a href='https://huggingface.co/microsoft/Florence-2-large-ft' target='_blank'>Model Page</a></p>"
24
-
25
-
26
- # Launch the interface
27
- interface = gr.Interface(
28
- fn=ocr_task,
29
- inputs=[image],
30
- outputs=[ocr_image_output, ocr_text_output],
31
- examples=examples,
32
- title=title,
33
- description=description,
34
- article=article,
35
- theme="soft",
36
- allow_flagging="never",
37
- )
38
- interface.launch(debug=False)
 
 
 
 
 
1
+ # Importing the requirements
2
+ import subprocess
3
+ import gradio as gr
4
+ from src.task import ocr_task
5
+
6
+
7
+ # Install the required dependencies
8
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
+
10
+ # Image input for the interface
11
+ image = gr.Image(type="pil", label="Image")
12
+
13
+ # Output for the interface (image and text)
14
+ ocr_text_output = gr.Textbox(label="OCR Text")
15
+ ocr_image_output = gr.Image(type="pil", label="Output Image")
16
+
17
+ # Examples for the interface (image paths)
18
+ examples = [
19
+ ["images/ocr_image_1jpg"],
20
+ ["images/ocr_image_2.jpg"],
21
+ ["images/ocr_image_3.jpg"],
22
+ ]
23
+
24
+ # Title, description, and article for the interface
25
+ title = "OCR Text Extraction and Visualization"
26
+ description = "Gradio Demo for the Florence-2-large Vision Language Model. This application performs Optical Character Recognition (OCR) on images and provides both extracted text and visualized bounding boxes around detected text regions. To use it, simply upload your image and click 'Submit'. The application will return the detected text and an image with bounding boxes drawn around the detected text regions. This is useful for various OCR-related tasks including document digitization, text extraction, and visual verification of detected text regions."
27
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2311.06242' target='_blank'>Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks</a> | <a href='https://huggingface.co/microsoft/Florence-2-large-ft' target='_blank'>Model Page</a></p>"
28
+
29
+
30
+ # Launch the interface
31
+ interface = gr.Interface(
32
+ fn=ocr_task,
33
+ inputs=[image],
34
+ outputs=[ocr_image_output, ocr_text_output],
35
+ examples=examples,
36
+ title=title,
37
+ description=description,
38
+ article=article,
39
+ theme="soft",
40
+ allow_flagging="never",
41
+ )
42
+ interface.launch(debug=False)