Spaces:
Sleeping
Sleeping
Benjamin Gonzalez
commited on
Commit
·
fb38431
1
Parent(s):
def73e8
flash attention is not possible
Browse files
app.py
CHANGED
@@ -1,8 +1,9 @@
|
|
|
|
1 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
2 |
import gradio as gr
|
3 |
|
4 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
|
5 |
-
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto",
|
6 |
|
7 |
def generate(prompt, length):
|
8 |
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
|
|
|
1 |
+
import torch
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
import gradio as gr
|
4 |
|
5 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
|
6 |
+
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", device_map="cuda", trust_remote_code=True)
|
7 |
|
8 |
def generate(prompt, length):
|
9 |
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
|