yilunzhao commited on
Commit
ebe6532
1 Parent(s): 7eb73dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -6,7 +6,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
6
 
7
  # Load model and tokenizer if a GPU is available
8
  if torch.cuda.is_available():
9
- model_id = "allenai/OLMo-7B-Instruct"
10
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
11
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
12
  else:
@@ -16,13 +16,12 @@ else:
16
  @spaces.GPU
17
  def generate_response(passage: str, question: str) -> str:
18
  # Prepare the input text by combining the passage and question
19
- chat = [{"role": "user", "content": f"Passage: {passage}\nQuestion: {question}"}]
 
20
 
21
- prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
22
- inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
23
- response = model.generate(input_ids=inputs.to(model.device), max_new_tokens=100)
24
 
25
- response = tokenizer.batch_decode(response, skip_special_tokens=True)[0].split("<|assistant|>")[-1].strip()
26
 
27
 
28
  return response
 
6
 
7
  # Load model and tokenizer if a GPU is available
8
  if torch.cuda.is_available():
9
+ model_id = "allenai/OLMo-7B-hf"
10
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)
11
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
12
  else:
 
16
  @spaces.GPU
17
  def generate_response(passage: str, question: str) -> str:
18
  # Prepare the input text by combining the passage and question
19
+ message = [f"Passage: {passage}\nQuestion: {question}"]
20
+ inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False)
21
 
22
+ response = model.generate(**inputs, max_new_tokens=100)
 
 
23
 
24
+ response = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
25
 
26
 
27
  return response