intervention-demo-finetuned

Sleeping

yilunzhao commited on Nov 7

Commit

bf2f5f2

•

1 Parent(s): 03e0db5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,14 +19,14 @@ else:
 @spaces.GPU
 def generate_response(passage: str, question: str) -> str:
     # Prepare the input text by combining the passage and question
-    chat = [{"role": "user", "content": f"Passage: {passage}\nQuestion: {question}"}]
-    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
-    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
-    response = model.generate(input_ids=inputs.to(model.device), max_new_tokens=100)
-    response = tokenizer.batch_decode(response, skip_special_tokens=True)[0].split("<|assistant|>")[-1].strip()
     return response

 @spaces.GPU
 def generate_response(passage: str, question: str) -> str:
     # Prepare the input text by combining the passage and question
+    message = [f"Passage: {passage}\nQuestion: {question}"]
+    inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False).to('cuda')
+    response = model.generate(**inputs, max_new_tokens=100)
+    response = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
+    response = response[len(message[0]):].strip().split('\n')[0]
     return response