BK-Lee commited on
Commit
c3599fc
1 Parent(s): 07c8c90
Files changed (1) hide show
  1. app.py +45 -40
app.py CHANGED
@@ -59,49 +59,53 @@ def threading_function(inputs, image_token_number, streamer, device, temperature
59
  @spaces.GPU
60
  def bot_streaming(message, history, temperature, new_max_token, top_p):
61
 
62
- # param
63
- for param in mmamba.parameters():
64
- param.data = param.to(accel.device)
65
- for param in meteor.parameters():
66
- param.data = param.to(accel.device)
67
-
68
- # prompt type -> input prompt
69
- image_token_number = int((490/14)**2)
70
- if len(message['files']) != 0:
71
- # Image Load
72
- image = F.interpolate(pil_to_tensor(Image.open(message['files'][0]).convert("RGB")).unsqueeze(0), size=(490, 490), mode='bicubic').squeeze(0)
73
- inputs = [{'image': image, 'question': message['text']}]
74
- else:
75
- inputs = [{'question': message['text']}]
76
-
77
- # [4] Meteor Generation
78
- with torch.inference_mode():
79
- # kwargs
80
- streamer = TextIteratorStreamer(tok_meteor, skip_special_tokens=True)
81
-
82
- # Threading generation
83
- thread = Thread(target=threading_function, kwargs=dict(inputs=inputs,
84
- image_token_number=image_token_number,
85
- streamer=streamer,
86
- device=accel.device,
87
- temperature=temperature,
88
- new_max_token=new_max_token,
89
- top_p=top_p))
90
- thread.start()
91
-
92
- # generated text
93
- generated_text = ""
94
- for new_text in streamer:
95
- generated_text += new_text
96
- generated_text
97
-
98
- # Text decoding
99
- response = generated_text.split('assistant\n')[-1].split('[U')[0].strip()
 
 
 
 
100
 
101
  buffer = ""
102
  for character in response:
103
  buffer += character
104
- time.sleep(0.02)
105
  yield buffer
106
 
107
  demo = gr.ChatInterface(fn=bot_streaming,
@@ -109,6 +113,7 @@ demo = gr.ChatInterface(fn=bot_streaming,
109
  additional_inputs_accordion="Generation Hyperparameters",
110
  theme=gr.themes.Soft(),
111
  title="☄️Meteor",
112
- description="Meteor is efficient 7B size Large Language and Vision Model built on the help of traversal of rationale",
 
113
  stop_btn="Stop Generation", multimodal=True)
114
  demo.launch()
 
59
  @spaces.GPU
60
  def bot_streaming(message, history, temperature, new_max_token, top_p):
61
 
62
+ try:
63
+ # param
64
+ for param in mmamba.parameters():
65
+ param.data = param.to(accel.device)
66
+ for param in meteor.parameters():
67
+ param.data = param.to(accel.device)
68
+
69
+ # prompt type -> input prompt
70
+ image_token_number = int((490/14)**2)
71
+ if len(message['files']) != 0:
72
+ # Image Load
73
+ image = F.interpolate(pil_to_tensor(Image.open(message['files'][0]).convert("RGB")).unsqueeze(0), size=(490, 490), mode='bicubic').squeeze(0)
74
+ inputs = [{'image': image, 'question': message['text']}]
75
+ else:
76
+ inputs = [{'question': message['text']}]
77
+
78
+ # [4] Meteor Generation
79
+ with torch.inference_mode():
80
+ # kwargs
81
+ streamer = TextIteratorStreamer(tok_meteor, skip_special_tokens=True)
82
+
83
+ # Threading generation
84
+ thread = Thread(target=threading_function, kwargs=dict(inputs=inputs,
85
+ image_token_number=image_token_number,
86
+ streamer=streamer,
87
+ device=accel.device,
88
+ temperature=temperature,
89
+ new_max_token=new_max_token,
90
+ top_p=top_p))
91
+ thread.start()
92
+
93
+ # generated text
94
+ generated_text = ""
95
+ for new_text in streamer:
96
+ generated_text += new_text
97
+ generated_text
98
+
99
+ # Text decoding
100
+ response = generated_text.split('assistant\n')[-1].split('[U')[0].strip()
101
+
102
+ except:
103
+ response = "There are no supported something: ex) pdf, video, sound, or any other unsupported multimodal format. only single image is supported in this version."
104
 
105
  buffer = ""
106
  for character in response:
107
  buffer += character
108
+ time.sleep(0.015)
109
  yield buffer
110
 
111
  demo = gr.ChatInterface(fn=bot_streaming,
 
113
  additional_inputs_accordion="Generation Hyperparameters",
114
  theme=gr.themes.Soft(),
115
  title="☄️Meteor",
116
+ description="Meteor is efficient 7B size Large Language and Vision Model built on the help of traversal of rationale"
117
+ "Its inference speed highly depends on assinging non-scheduled GPU (Therefore, once all GPUs are busy, then inference may be taken in infinity)",
118
  stop_btn="Stop Generation", multimodal=True)
119
  demo.launch()