Tonic commited on
Commit
aebaa46
β€’
1 Parent(s): 353f4f2

remove loadimg

Browse files
Files changed (3) hide show
  1. app.py +33 -25
  2. requirements.txt +2 -1
  3. results/.example +0 -0
app.py CHANGED
@@ -6,6 +6,7 @@ import base64
6
  import spaces
7
  from loadimg import load_img
8
  from PIL import Image
 
9
 
10
  title = """# πŸ™‹πŸ»β€β™‚οΈWelcome to Tonic'sπŸ«΄πŸ»πŸ“ΈGOT-OCR"""
11
  description = """"
@@ -46,38 +47,45 @@ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
46
  model = model.eval().cuda()
47
  model.config.pad_token_id = tokenizer.eos_token_id
48
 
 
 
 
 
 
 
 
 
 
49
  @spaces.GPU
50
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None, render=False):
51
- if isinstance(image, str):
52
- img_path = image
53
- img = Image.open(img_path)
54
- else:
55
- img = Image.open(image)
56
  img_path = "/tmp/temp_image.png"
57
  img.save(img_path)
58
 
59
- if task == "Plain Text OCR":
60
- res = model.chat(tokenizer, img, ocr_type='ocr')
61
- elif task == "Format Text OCR":
62
- res = model.chat(tokenizer, img, ocr_type='format')
63
- elif task == "Fine-grained OCR (Box)":
64
- res = model.chat(tokenizer, img, ocr_type=ocr_type, ocr_box=ocr_box)
65
- elif task == "Fine-grained OCR (Color)":
66
- res = model.chat(tokenizer, img, ocr_type=ocr_type, ocr_color=ocr_color)
67
- elif task == "Multi-crop OCR":
68
- res = model.chat_crop(tokenizer, image_file=img_path)
69
- elif task == "Render Formatted OCR":
70
- res = model.chat(tokenizer, img, ocr_type='format', render=True, save_render_file='./results/demo.html')
71
- with open('./results/demo.html', 'r') as f:
72
- html_content = f.read()
73
- return res, html_content
74
-
75
- # Clean up
76
- if img_path.startswith("/tmp/"):
77
  os.remove(img_path)
 
 
 
 
78
 
79
- return res, None
80
-
81
  def update_inputs(task):
82
  if task == "Plain Text OCR" or task == "Format Text OCR" or task == "Multi-crop OCR":
83
  return [gr.update(visible=False)] * 4
 
6
  import spaces
7
  from loadimg import load_img
8
  from PIL import Image
9
+ import numpy as np
10
 
11
  title = """# πŸ™‹πŸ»β€β™‚οΈWelcome to Tonic'sπŸ«΄πŸ»πŸ“ΈGOT-OCR"""
12
  description = """"
 
47
  model = model.eval().cuda()
48
  model.config.pad_token_id = tokenizer.eos_token_id
49
 
50
+ def load_image(image_file):
51
+ if isinstance(image_file, str):
52
+ if image_file.startswith('http') or image_file.startswith('https'):
53
+ return Image.open(requests.get(image_file, stream=True).raw).convert('RGB')
54
+ else:
55
+ return Image.open(image_file).convert('RGB')
56
+ else:
57
+ return image_file.convert('RGB')
58
+
59
  @spaces.GPU
60
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None, render=False):
61
+ try:
62
+ img = load_image(image)
 
 
 
63
  img_path = "/tmp/temp_image.png"
64
  img.save(img_path)
65
 
66
+ if task == "Plain Text OCR":
67
+ res = model.chat(tokenizer, img_path, ocr_type='ocr')
68
+ elif task == "Format Text OCR":
69
+ res = model.chat(tokenizer, img_path, ocr_type='format')
70
+ elif task == "Fine-grained OCR (Box)":
71
+ res = model.chat(tokenizer, img_path, ocr_type=ocr_type, ocr_box=ocr_box)
72
+ elif task == "Fine-grained OCR (Color)":
73
+ res = model.chat(tokenizer, img_path, ocr_type=ocr_type, ocr_color=ocr_color)
74
+ elif task == "Multi-crop OCR":
75
+ res = model.chat_crop(tokenizer, image_file=img_path)
76
+ elif task == "Render Formatted OCR":
77
+ res = model.chat(tokenizer, img_path, ocr_type='format', render=True, save_render_file='./results/demo.html')
78
+ with open('./results/demo.html', 'r') as f:
79
+ html_content = f.read()
80
+ return res, html_content
81
+
82
+ # Clean up
 
83
  os.remove(img_path)
84
+
85
+ return res, None
86
+ except Exception as e:
87
+ return str(e), None
88
 
 
 
89
  def update_inputs(task):
90
  if task == "Plain Text OCR" or task == "Format Text OCR" or task == "Multi-crop OCR":
91
  return [gr.update(visible=False)] * 4
requirements.txt CHANGED
@@ -8,4 +8,5 @@
8
  cairosvg
9
  accelerate
10
  numpy
11
- loadimg
 
 
8
  cairosvg
9
  accelerate
10
  numpy
11
+ loadimg
12
+ pillow
results/.example ADDED
File without changes