lakshyaag commited on
Commit
20c7176
·
verified ·
1 Parent(s): f40f065

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -42,6 +42,8 @@ base_model = AutoModelForCausalLM.from_pretrained(
42
  )
43
 
44
  model = PeftModel.from_pretrained(base_model, "lakshyaag/llama38binstruct_summarize")
 
 
45
 
46
  # Move model to GPU if available
47
  if torch.cuda.is_available():
@@ -95,9 +97,7 @@ async def main(message: cl.Message):
95
  )
96
 
97
  # decode output from tokenized output to str output
98
- decoded_output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
99
-
100
- print(decoded_output)
101
 
102
  # return only the generated response (not the prompt) as output
103
  response = decoded_output[0].split("<|end_header_id|>")[-1]
 
42
  )
43
 
44
  model = PeftModel.from_pretrained(base_model, "lakshyaag/llama38binstruct_summarize")
45
+ model.merge_and_unload()
46
+
47
 
48
  # Move model to GPU if available
49
  if torch.cuda.is_available():
 
97
  )
98
 
99
  # decode output from tokenized output to str output
100
+ decoded_output = tokenizer.batch_decode(generated_ids)
 
 
101
 
102
  # return only the generated response (not the prompt) as output
103
  response = decoded_output[0].split("<|end_header_id|>")[-1]