Zaherrr commited on
Commit
a7af971
1 Parent(s): c96e7bd

put the model on CPU, because there's no GPU

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -15,7 +15,7 @@ processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
15
  model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
16
  torch_dtype=torch.bfloat16,
17
  #_attn_implementation="flash_attention_2",
18
- trust_remote_code=True).to("cuda")
19
 
20
  BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
21
  EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
@@ -50,7 +50,9 @@ def model_inference(
50
 
51
  prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
52
  inputs = processor(text=prompt, images=[images], return_tensors="pt")
53
- inputs = {k: v.to("cuda") for k, v in inputs.items()}
 
 
54
 
55
  generation_args = {
56
  "max_new_tokens": max_new_tokens,
 
15
  model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
16
  torch_dtype=torch.bfloat16,
17
  #_attn_implementation="flash_attention_2",
18
+ trust_remote_code=True)#.to("cuda")
19
 
20
  BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
21
  EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
 
50
 
51
  prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
52
  inputs = processor(text=prompt, images=[images], return_tensors="pt")
53
+ # inputs = {k: v.to("cuda") for k, v in inputs.items()}
54
+ inputs = {k: v for k, v in inputs.items()}
55
+
56
 
57
  generation_args = {
58
  "max_new_tokens": max_new_tokens,