Csplk commited on
Commit
3068721
β€’
1 Parent(s): 04e1dd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -17
app.py CHANGED
@@ -2,38 +2,43 @@ import spaces
2
  import torch
3
  import re
4
  import gradio as gr
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
6
- from PIL import Image
 
 
7
 
8
- if torch.cuda.is_available():
9
- device, dtype = "cuda", torch.float16
10
- else:
11
- device, dtype = "cpu", torch.float32
12
 
13
  model_id = "vikhyatk/moondream2"
14
- revision = "2024-07-23"
15
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
16
  moondream = AutoModelForCausalLM.from_pretrained(
17
- model_id, trust_remote_code=True, revision=revision, torch_dtype=dtype
18
- ).to(device=device)
 
 
19
  moondream.eval()
20
 
21
- @spaces.GPU(120)
22
  def answer_questions(image_tuples, prompt_text):
23
  result = ""
24
  Q_and_A = ""
25
  prompts = [p.strip() for p in prompt_text.split(',')]
26
  image_embeds = [img[0] for img in image_tuples if img[0] is not None]
27
-
28
- #print(f"\nprompts: {prompts}\n\n")
29
  answers = []
 
30
  for prompt in prompts:
31
- image_answers = moondream.batch_answer(
32
- images=[img.convert("RGB") for img in image_embeds],
33
- prompts=[prompt] * len(image_embeds),
34
- tokenizer=tokenizer,
 
 
35
  )
36
  answers.append(image_answers)
 
 
37
 
38
  for i, prompt in enumerate(prompts):
39
  Q_and_A += f"### Q: {prompt}\n"
@@ -43,7 +48,7 @@ def answer_questions(image_tuples, prompt_text):
43
  Q_and_A += f"**{image_name} A:** \n {answer_text} \n\n"
44
 
45
  result = {'headers': prompts, 'data': answers}
46
- #print(f"result\n{result}\n\nQ_and_A\n{Q_and_A}\n\n")
47
  return Q_and_A, result
48
 
49
  with gr.Blocks() as demo:
 
2
  import torch
3
  import re
4
  import gradio as gr
5
+ from threading import Thread
6
+ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
+ from PIL import ImageDraw
8
+ from torchvision.transforms.v2 import Resize
9
 
10
+ import subprocess
11
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
 
12
 
13
  model_id = "vikhyatk/moondream2"
14
+ revision = "2024-08-26"
15
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
16
  moondream = AutoModelForCausalLM.from_pretrained(
17
+ model_id, trust_remote_code=True, revision=revision,
18
+ torch_dtype=torch.bfloat16, device_map={"": "cuda"},
19
+ attn_implementation="flash_attention_2"
20
+ )
21
  moondream.eval()
22
 
23
+ @spaces.GPU
24
  def answer_questions(image_tuples, prompt_text):
25
  result = ""
26
  Q_and_A = ""
27
  prompts = [p.strip() for p in prompt_text.split(',')]
28
  image_embeds = [img[0] for img in image_tuples if img[0] is not None]
 
 
29
  answers = []
30
+
31
  for prompt in prompts:
32
+ thread = Thread(
33
+ image_answers = moondream.batch_answer(
34
+ images=[img.convert("RGB") for img in image_embeds],
35
+ prompts=[prompt] * len(image_embeds),
36
+ tokenizer=tokenizer
37
+ )
38
  )
39
  answers.append(image_answers)
40
+
41
+ thread.start()
42
 
43
  for i, prompt in enumerate(prompts):
44
  Q_and_A += f"### Q: {prompt}\n"
 
48
  Q_and_A += f"**{image_name} A:** \n {answer_text} \n\n"
49
 
50
  result = {'headers': prompts, 'data': answers}
51
+ print(f"result\n{result}\n\nQ_and_A\n{Q_and_A}\n\n")
52
  return Q_and_A, result
53
 
54
  with gr.Blocks() as demo: