wsj1995 commited on
Commit
6bbb120
1 Parent(s): f8bc393

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -16,11 +16,13 @@ model_name = "Qwen/Qwen2.5-7B-Instruct"
16
  # )
17
  # model = Qwen2VLForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True, torch_dtype="auto").cuda().eval()
18
  # model = Qwen2VLForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).to("cuda").eval()
 
 
 
19
  model = Qwen2VLForConditionalGeneration.from_pretrained(
20
- "Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
21
  )
22
  processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
23
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
24
 
25
  @spaces.GPU
26
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
@@ -36,7 +38,6 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
36
  ],
37
  }
38
  ]
39
-
40
  # Preparation for inference
41
  text = processor.apply_chat_template(
42
  messages, tokenize=False, add_generation_prompt=True
 
16
  # )
17
  # model = Qwen2VLForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True, torch_dtype="auto").cuda().eval()
18
  # model = Qwen2VLForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).to("cuda").eval()
19
+
20
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
21
+
22
  model = Qwen2VLForConditionalGeneration.from_pretrained(
23
+ "Qwen/Qwen2-VL-72B-Instruct-AWQ", torch_dtype="auto", device_map="auto"
24
  )
25
  processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
 
26
 
27
  @spaces.GPU
28
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
 
38
  ],
39
  }
40
  ]
 
41
  # Preparation for inference
42
  text = processor.apply_chat_template(
43
  messages, tokenize=False, add_generation_prompt=True