Spaces:
Sleeping
Sleeping
put the model on CPU, because there's no GPU
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
|
|
15 |
model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
|
16 |
torch_dtype=torch.bfloat16,
|
17 |
#_attn_implementation="flash_attention_2",
|
18 |
-
trust_remote_code=True)
|
19 |
|
20 |
BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
21 |
EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
|
@@ -50,7 +50,9 @@ def model_inference(
|
|
50 |
|
51 |
prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
|
52 |
inputs = processor(text=prompt, images=[images], return_tensors="pt")
|
53 |
-
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
|
|
|
|
54 |
|
55 |
generation_args = {
|
56 |
"max_new_tokens": max_new_tokens,
|
|
|
15 |
model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
|
16 |
torch_dtype=torch.bfloat16,
|
17 |
#_attn_implementation="flash_attention_2",
|
18 |
+
trust_remote_code=True)#.to("cuda")
|
19 |
|
20 |
BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
21 |
EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
|
|
|
50 |
|
51 |
prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
|
52 |
inputs = processor(text=prompt, images=[images], return_tensors="pt")
|
53 |
+
# inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
54 |
+
inputs = {k: v for k, v in inputs.items()}
|
55 |
+
|
56 |
|
57 |
generation_args = {
|
58 |
"max_new_tokens": max_new_tokens,
|