Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ from transformers import (
|
|
6 |
AutoModelForCausalLM,
|
7 |
TextIteratorStreamer,
|
8 |
pipeline,
|
|
|
9 |
)
|
10 |
from threading import Thread
|
11 |
|
@@ -21,13 +22,19 @@ from threading import Thread
|
|
21 |
model_name_or_path = "TheBloke/phi-2-GPTQ"
|
22 |
# To use a different branch, change revision
|
23 |
# For example: revision="gptq-4bit-32g-actorder_True"
|
|
|
|
|
|
|
|
|
24 |
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
25 |
device_map="cpu",
|
26 |
trust_remote_code=True,
|
27 |
-
revision="main"
|
|
|
28 |
|
29 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
30 |
|
|
|
31 |
# Text generation pipeline
|
32 |
phi2 = pipeline(
|
33 |
"text-generation",
|
|
|
6 |
AutoModelForCausalLM,
|
7 |
TextIteratorStreamer,
|
8 |
pipeline,
|
9 |
+
AutoConfig,
|
10 |
)
|
11 |
from threading import Thread
|
12 |
|
|
|
22 |
model_name_or_path = "TheBloke/phi-2-GPTQ"
|
23 |
# To use a different branch, change revision
|
24 |
# For example: revision="gptq-4bit-32g-actorder_True"
|
25 |
+
|
26 |
+
config = AutoConfig.from_pretrained(model_name_or_path)
|
27 |
+
config.quantization_config["disable_exllama"] = True
|
28 |
+
|
29 |
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
30 |
device_map="cpu",
|
31 |
trust_remote_code=True,
|
32 |
+
revision="main",
|
33 |
+
config=config)
|
34 |
|
35 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
36 |
|
37 |
+
|
38 |
# Text generation pipeline
|
39 |
phi2 = pipeline(
|
40 |
"text-generation",
|