Update app.py
Browse files
app.py
CHANGED
@@ -42,6 +42,8 @@ base_model = AutoModelForCausalLM.from_pretrained(
|
|
42 |
)
|
43 |
|
44 |
model = PeftModel.from_pretrained(base_model, "lakshyaag/llama38binstruct_summarize")
|
|
|
|
|
45 |
|
46 |
# Move model to GPU if available
|
47 |
if torch.cuda.is_available():
|
@@ -95,9 +97,7 @@ async def main(message: cl.Message):
|
|
95 |
)
|
96 |
|
97 |
# decode output from tokenized output to str output
|
98 |
-
decoded_output = tokenizer.batch_decode(generated_ids
|
99 |
-
|
100 |
-
print(decoded_output)
|
101 |
|
102 |
# return only the generated response (not the prompt) as output
|
103 |
response = decoded_output[0].split("<|end_header_id|>")[-1]
|
|
|
42 |
)
|
43 |
|
44 |
model = PeftModel.from_pretrained(base_model, "lakshyaag/llama38binstruct_summarize")
|
45 |
+
model.merge_and_unload()
|
46 |
+
|
47 |
|
48 |
# Move model to GPU if available
|
49 |
if torch.cuda.is_available():
|
|
|
97 |
)
|
98 |
|
99 |
# decode output from tokenized output to str output
|
100 |
+
decoded_output = tokenizer.batch_decode(generated_ids)
|
|
|
|
|
101 |
|
102 |
# return only the generated response (not the prompt) as output
|
103 |
response = decoded_output[0].split("<|end_header_id|>")[-1]
|