Vitrous commited on
Commit
5b66768
·
verified ·
1 Parent(s): 2ee547c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -10,7 +10,7 @@ if torch.cuda.is_available():
10
  else:
11
  print("CUDA is not available. CPU will be used.")
12
  # Load the model and tokenizer
13
- model_name_or_path = "/kaggle/input/vicuna/"
14
  # Dictionary to store conversation threads and their context
15
  conversations = {}
16
  Device_Type = "cuda"
@@ -37,6 +37,22 @@ def load_quantized_model(model_id, model_basename):
37
  model, tokenizer = load_quantized_model("/kaggle/input/vicuna/", "model.safetensors")
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  @app.get("/")
41
  async def read_root():
42
  return {"message": "Welcome to Eren Bot!"}
 
10
  else:
11
  print("CUDA is not available. CPU will be used.")
12
  # Load the model and tokenizer
13
+ model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPT/"
14
  # Dictionary to store conversation threads and their context
15
  conversations = {}
16
  Device_Type = "cuda"
 
37
  model, tokenizer = load_quantized_model("/kaggle/input/vicuna/", "model.safetensors")
38
 
39
 
40
+ def load_model_norm():
41
+ if torch.cuda.is_available():
42
+ print("CUDA is available. GPU will be used.")
43
+ else:
44
+ print("CUDA is not available. CPU will be used.")
45
+ # Load model directly
46
+ model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
47
+ # To use a different branch, change revision
48
+ # For example: revision="main"
49
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="auto", trust_remote_code=True,revision="main")
50
+
51
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
52
+
53
+ return model, tokenizer
54
+
55
+
56
  @app.get("/")
57
  async def read_root():
58
  return {"message": "Welcome to Eren Bot!"}