Vitrous commited on
Commit
a330154
·
verified ·
1 Parent(s): 6b266fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -26,7 +26,7 @@ def load_model_norm():
26
  model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
27
  # To use a different branch, change revision
28
  # For example: revision="main"
29
- model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="auto", trust_remote_code=True,revision="gptq-4bit-32g-actorder_True")
30
  # Switch to CPU inference
31
  #model.to("cuda")
32
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
@@ -40,7 +40,7 @@ model, tokenizer = load_model_norm()
40
  app = FastAPI(root_path="/api/v1")
41
 
42
  #Generates a response from the model
43
- def generate_response(prompt: str) -> dict:
44
  # Define the user prompt
45
  user_prompt = f'USER: {prompt}'
46
 
 
26
  model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
27
  # To use a different branch, change revision
28
  # For example: revision="main"
29
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="auto", trust_remote_code=True,revision="gptq-4bit-128g-actorder_True")
30
  # Switch to CPU inference
31
  #model.to("cuda")
32
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
 
40
  app = FastAPI(root_path="/api/v1")
41
 
42
  #Generates a response from the model
43
+ def generate_response(prompt: str) -> str:
44
  # Define the user prompt
45
  user_prompt = f'USER: {prompt}'
46