Update app.py
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ def load_model_norm():
|
|
26 |
model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
|
27 |
# To use a different branch, change revision
|
28 |
# For example: revision="main"
|
29 |
-
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="auto", trust_remote_code=True,revision="gptq-4bit-
|
30 |
# Switch to CPU inference
|
31 |
#model.to("cuda")
|
32 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
@@ -40,7 +40,7 @@ model, tokenizer = load_model_norm()
|
|
40 |
app = FastAPI(root_path="/api/v1")
|
41 |
|
42 |
#Generates a response from the model
|
43 |
-
def generate_response(prompt: str) ->
|
44 |
# Define the user prompt
|
45 |
user_prompt = f'USER: {prompt}'
|
46 |
|
|
|
26 |
model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
|
27 |
# To use a different branch, change revision
|
28 |
# For example: revision="main"
|
29 |
+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="auto", trust_remote_code=True,revision="gptq-4bit-128g-actorder_True")
|
30 |
# Switch to CPU inference
|
31 |
#model.to("cuda")
|
32 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
|
|
40 |
app = FastAPI(root_path="/api/v1")
|
41 |
|
42 |
#Generates a response from the model
|
43 |
+
def generate_response(prompt: str) -> str:
|
44 |
# Define the user prompt
|
45 |
user_prompt = f'USER: {prompt}'
|
46 |
|