Update app.py
Browse files
app.py
CHANGED
@@ -9,14 +9,24 @@ from transformers import (
|
|
9 |
)
|
10 |
from threading import Thread
|
11 |
|
12 |
-
|
13 |
-
checkpoint = "microsoft/phi-2"
|
14 |
|
15 |
-
|
16 |
-
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
|
17 |
-
model = AutoModelForCausalLM.from_pretrained(
|
18 |
-
checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
|
19 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
# Text generation pipeline
|
22 |
phi2 = pipeline(
|
@@ -80,9 +90,9 @@ with gr.Blocks() as demo:
|
|
80 |
gr.Markdown(
|
81 |
"""
|
82 |
# Phi-2 Chatbot Demo
|
83 |
-
This chatbot was created using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
|
84 |
|
85 |
-
In order to reduce the response time on this hardware, `max_new_tokens`
|
86 |
"""
|
87 |
)
|
88 |
|
|
|
9 |
)
|
10 |
from threading import Thread
|
11 |
|
12 |
+
## The huggingface model id for Microsoft's phi-2 model
|
13 |
+
#checkpoint = "microsoft/phi-2"
|
14 |
|
15 |
+
## Download and load model and tokenizer
|
16 |
+
#tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
|
17 |
+
#model = AutoModelForCausalLM.from_pretrained(
|
18 |
+
# checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
|
19 |
+
#)
|
20 |
+
|
21 |
+
model_name_or_path = "TheBloke/phi-2-GPTQ"
|
22 |
+
# To use a different branch, change revision
|
23 |
+
# For example: revision="gptq-4bit-32g-actorder_True"
|
24 |
+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
25 |
+
device_map="auto",
|
26 |
+
trust_remote_code=True,
|
27 |
+
revision="main")
|
28 |
+
|
29 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
30 |
|
31 |
# Text generation pipeline
|
32 |
phi2 = pipeline(
|
|
|
90 |
gr.Markdown(
|
91 |
"""
|
92 |
# Phi-2 Chatbot Demo
|
93 |
+
This chatbot was created using TheBloke/phi-2-GPTQ from Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
|
94 |
|
95 |
+
In order to reduce the response time on this hardware, set `max_new_tokens` to lower number in the text generation pipeline.
|
96 |
"""
|
97 |
)
|
98 |
|