Artix

Runtime error

Vitrous commited on Feb 14, 2024

Commit

5b66768

verified ·

1 Parent(s): 2ee547c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ if torch.cuda.is_available():
 else:
     print("CUDA is not available. CPU will be used.")
 # Load the model and tokenizer
-model_name_or_path = "/kaggle/input/vicuna/"
 # Dictionary to store conversation threads and their context
 conversations = {}
 Device_Type = "cuda"
@@ -37,6 +37,22 @@ def load_quantized_model(model_id, model_basename):
 model, tokenizer = load_quantized_model("/kaggle/input/vicuna/", "model.safetensors")
 @app.get("/")
 async def read_root():
     return {"message": "Welcome to Eren Bot!"}

 else:
     print("CUDA is not available. CPU will be used.")
 # Load the model and tokenizer
+model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPT/"
 # Dictionary to store conversation threads and their context
 conversations = {}
 Device_Type = "cuda"
 model, tokenizer = load_quantized_model("/kaggle/input/vicuna/", "model.safetensors")
+def load_model_norm():
+    if torch.cuda.is_available():
+    print("CUDA is available. GPU will be used.")
+    else:
+        print("CUDA is not available. CPU will be used.")
+    # Load model directly
+    model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
+    # To use a different branch, change revision
+    # For example: revision="main"
+    model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="auto", trust_remote_code=True,revision="main")
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+    return model, tokenizer
 @app.get("/")
 async def read_root():
     return {"message": "Welcome to Eren Bot!"}