Prabhjotschugh commited on
Commit
906c0e0
β€’
1 Parent(s): a216b49

Add necessary files for Tesseract OCR deployment

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +84 -0
  3. packages.txt +5 -0
  4. requirements.txt +3 -0
  5. setup.sh +10 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.sh text eol=lf
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import pytesseract
4
+
5
+
6
+ pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
7
+
8
+ def extract_text(image):
9
+ extracted_text = pytesseract.image_to_string(image, lang='hin+eng+spa+fra+pan')
10
+ return extracted_text
11
+
12
+ def keyword_search(text, keyword):
13
+ if not keyword:
14
+ return text
15
+
16
+ words = text.split()
17
+ return [(word, word.lower() == keyword.lower()) for word in words]
18
+
19
+ def process_image_with_search(image):
20
+ extracted_text = extract_text(image)
21
+ return extracted_text, gr.update(visible=True), gr.update(value="")
22
+
23
+ def search_keyword(text, keyword):
24
+ highlighted_text = keyword_search(text, keyword)
25
+ return gr.update(value=highlighted_text, visible=True)
26
+
27
+ def clear_outputs():
28
+ return "", gr.update(value=None, visible=False), gr.update(visible=False), gr.update(value="")
29
+
30
+ def main():
31
+ theme = gr.themes.Default().set(
32
+ button_primary_background_fill="#FFA500",
33
+ button_primary_background_fill_hover="#FF8C00",
34
+ button_primary_text_color="white",
35
+ )
36
+
37
+ with gr.Blocks(theme=theme, css=".highlight { background-color: #FFA500; }") as interface:
38
+ gr.Markdown("# TextVision 🌟")
39
+ gr.Markdown("Upload an image, extract text, and search for keywords.")
40
+
41
+ with gr.Row():
42
+ with gr.Column(scale=1):
43
+ image_input = gr.Image(type="pil", label="Upload Image")
44
+ with gr.Row():
45
+ extract_text_button = gr.Button("Extract Text", variant="primary")
46
+ clear_button = gr.Button("Clear", variant="secondary")
47
+
48
+ search_keyword_prompt = gr.Checkbox(label="Do you want to search for a keyword?", visible=False)
49
+
50
+ with gr.Group(visible=False) as search_group:
51
+ keyword_input = gr.Textbox(label="Enter Keyword")
52
+ search_button = gr.Button("Search Keyword", variant="primary")
53
+
54
+ with gr.Column(scale=2):
55
+ extracted_text_output = gr.Textbox(label="Extracted Text", interactive=False, lines=10)
56
+ keyword_highlighted_output = gr.HighlightedText(label="Highlighted Text", visible=False)
57
+
58
+ extract_text_button.click(
59
+ process_image_with_search,
60
+ inputs=image_input,
61
+ outputs=[extracted_text_output, search_keyword_prompt, search_group],
62
+ )
63
+
64
+ search_keyword_prompt.change(
65
+ lambda val: [gr.update(visible=val), gr.update(visible=val), gr.update(visible=val)],
66
+ inputs=search_keyword_prompt,
67
+ outputs=[keyword_input, search_button, search_group],
68
+ )
69
+
70
+ clear_button.click(
71
+ clear_outputs,
72
+ outputs=[extracted_text_output, keyword_highlighted_output, search_group],
73
+ )
74
+
75
+ search_button.click(
76
+ search_keyword,
77
+ inputs=[extracted_text_output, keyword_input],
78
+ outputs=keyword_highlighted_output,
79
+ )
80
+
81
+ interface.launch()
82
+
83
+ if __name__ == "__main__":
84
+ main()
packages.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ tesseract-ocr
2
+ tesseract-ocr-hin
3
+ tesseract-ocr-spa
4
+ tesseract-ocr-fra
5
+ tesseract-ocr-pan
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ Pillow
3
+ pytesseract
setup.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Update package lists
4
+ apt-get update
5
+
6
+ # Install packages from packages.txt
7
+ xargs -a packages.txt apt-get install -y
8
+
9
+ # Install Python packages
10
+ pip install -r requirements.txt