Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
6 |
|
7 |
# Load model and tokenizer if a GPU is available
|
8 |
if torch.cuda.is_available():
|
9 |
-
model_id = "allenai/OLMo-7B-
|
10 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
12 |
else:
|
@@ -16,13 +16,12 @@ else:
|
|
16 |
@spaces.GPU
|
17 |
def generate_response(passage: str, question: str) -> str:
|
18 |
# Prepare the input text by combining the passage and question
|
19 |
-
|
|
|
20 |
|
21 |
-
|
22 |
-
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
|
23 |
-
response = model.generate(input_ids=inputs.to(model.device), max_new_tokens=100)
|
24 |
|
25 |
-
response = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
|
26 |
|
27 |
|
28 |
return response
|
|
|
6 |
|
7 |
# Load model and tokenizer if a GPU is available
|
8 |
if torch.cuda.is_available():
|
9 |
+
model_id = "allenai/OLMo-7B-hf"
|
10 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
12 |
else:
|
|
|
16 |
@spaces.GPU
|
17 |
def generate_response(passage: str, question: str) -> str:
|
18 |
# Prepare the input text by combining the passage and question
|
19 |
+
message = [f"Passage: {passage}\nQuestion: {question}"]
|
20 |
+
inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
|
21 |
|
22 |
+
response = model.generate(**inputs, max_new_tokens=100)
|
|
|
|
|
23 |
|
24 |
+
response = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
|
25 |
|
26 |
|
27 |
return response
|