SwordElucidator commited on
Commit
261f61b
1 Parent(s): 863b4e0

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +52 -0
handler.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from io import BytesIO
3
+ from typing import Any, List, Dict
4
+
5
+ from PIL import Image
6
+ from transformers import AutoProcessor, AutoModelForCausalLM
7
+ from PIL import Image
8
+ import requests
9
+ import copy
10
+ import base64
11
+
12
+
13
+
14
+ class EndpointHandler():
15
+ def __init__(self, path=""):
16
+ # Use a pipeline as a high-level helper
17
+ model_id = 'microsoft/Florence-2-large'
18
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval().cuda()
19
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
20
+ self.model = model
21
+ self.processor = processor
22
+
23
+ def run_example(self, image, task_prompt, text_input=None):
24
+ if text_input is None:
25
+ prompt = task_prompt
26
+ else:
27
+ prompt = task_prompt + text_input
28
+ inputs = self.processor(text=prompt, images=image, return_tensors="pt")
29
+ generated_ids = self.model.generate(
30
+ input_ids=inputs["input_ids"].cuda(),
31
+ pixel_values=inputs["pixel_values"].cuda(),
32
+ max_new_tokens=1024,
33
+ early_stopping=False,
34
+ do_sample=False,
35
+ num_beams=3,
36
+ )
37
+ generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
38
+ parsed_answer = self.processor.post_process_generation(
39
+ generated_text,
40
+ task=task_prompt,
41
+ image_size=(image.width, image.height)
42
+ )
43
+
44
+ return parsed_answer
45
+
46
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
47
+ image = data.pop("image", None)
48
+ image = Image.open(BytesIO(base64.b64decode(image)))
49
+
50
+ caption = self.run_example(image, '<MORE_DETAILED_CAPTION>')
51
+ ocr = self.run_example(image, '<OCR>')
52
+ return {**caption, **ocr}