sitammeur commited on
Commit
8c1a99e
·
verified ·
1 Parent(s): d5413f2

Update src/model.py

Browse files
Files changed (1) hide show
  1. src/model.py +52 -52
src/model.py CHANGED
@@ -1,52 +1,52 @@
1
- # Importing necessary libraries
2
- import spaces
3
- from transformers import AutoProcessor, AutoModelForCausalLM
4
-
5
-
6
- # Load model and processor from Hugging Face
7
- model_id = "microsoft/Florence-2-large-ft"
8
- model = (
9
- AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval().cuda()
10
- )
11
- processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
12
-
13
-
14
- @spaces.GPU(duration=120)
15
- def run_example(task_prompt, image, text_input=None):
16
- """
17
- Runs an example using the given task prompt and image.
18
-
19
- Args:
20
- task_prompt (str): The task prompt for the example.
21
- image (PIL.Image.Image): The image to be processed.
22
- text_input (str, optional): Additional text input to be appended to the task prompt. Defaults to None.
23
-
24
- Returns:
25
- str: The parsed answer generated by the model.
26
- """
27
-
28
- # If there is no text input, use the task prompt as the prompt
29
- if text_input is None:
30
- prompt = task_prompt
31
- else:
32
- prompt = task_prompt + text_input
33
-
34
- # Process the image and text input
35
- inputs = processor(text=prompt, images=image, return_tensors="pt")
36
-
37
- # Generate the answer using the model
38
- generated_ids = model.generate(
39
- input_ids=inputs["input_ids"].cuda(),
40
- pixel_values=inputs["pixel_values"].cuda(),
41
- max_new_tokens=1024,
42
- early_stopping=False,
43
- do_sample=False,
44
- num_beams=3,
45
- )
46
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
47
- parsed_answer = processor.post_process_generation(
48
- generated_text, task=task_prompt, image_size=(image.width, image.height)
49
- )
50
-
51
- # Return the parsed answer
52
- return parsed_answer
 
1
+ # Importing necessary libraries
2
+ import spaces
3
+ from transformers import AutoProcessor, AutoModelForCausalLM
4
+
5
+
6
+ # Load model and processor from Hugging Face
7
+ model_id = "microsoft/Florence-2-large-ft"
8
+ model = (
9
+ AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
10
+ )
11
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
12
+
13
+
14
+ @spaces.GPU(duration=120)
15
+ def run_example(task_prompt, image, text_input=None):
16
+ """
17
+ Runs an example using the given task prompt and image.
18
+
19
+ Args:
20
+ task_prompt (str): The task prompt for the example.
21
+ image (PIL.Image.Image): The image to be processed.
22
+ text_input (str, optional): Additional text input to be appended to the task prompt. Defaults to None.
23
+
24
+ Returns:
25
+ str: The parsed answer generated by the model.
26
+ """
27
+
28
+ # If there is no text input, use the task prompt as the prompt
29
+ if text_input is None:
30
+ prompt = task_prompt
31
+ else:
32
+ prompt = task_prompt + text_input
33
+
34
+ # Process the image and text input
35
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
36
+
37
+ # Generate the answer using the model
38
+ generated_ids = model.generate(
39
+ input_ids=inputs["input_ids"],
40
+ pixel_values=inputs["pixel_values"],
41
+ max_new_tokens=1024,
42
+ early_stopping=False,
43
+ do_sample=False,
44
+ num_beams=3,
45
+ )
46
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
47
+ parsed_answer = processor.post_process_generation(
48
+ generated_text, task=task_prompt, image_size=(image.width, image.height)
49
+ )
50
+
51
+ # Return the parsed answer
52
+ return parsed_answer