Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ if torch.cuda.is_available():
|
|
10 |
else:
|
11 |
print("CUDA is not available. CPU will be used.")
|
12 |
# Load the model and tokenizer
|
13 |
-
model_name_or_path = "/
|
14 |
# Dictionary to store conversation threads and their context
|
15 |
conversations = {}
|
16 |
Device_Type = "cuda"
|
@@ -37,6 +37,22 @@ def load_quantized_model(model_id, model_basename):
|
|
37 |
model, tokenizer = load_quantized_model("/kaggle/input/vicuna/", "model.safetensors")
|
38 |
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
@app.get("/")
|
41 |
async def read_root():
|
42 |
return {"message": "Welcome to Eren Bot!"}
|
|
|
10 |
else:
|
11 |
print("CUDA is not available. CPU will be used.")
|
12 |
# Load the model and tokenizer
|
13 |
+
model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPT/"
|
14 |
# Dictionary to store conversation threads and their context
|
15 |
conversations = {}
|
16 |
Device_Type = "cuda"
|
|
|
37 |
model, tokenizer = load_quantized_model("/kaggle/input/vicuna/", "model.safetensors")
|
38 |
|
39 |
|
40 |
+
def load_model_norm():
|
41 |
+
if torch.cuda.is_available():
|
42 |
+
print("CUDA is available. GPU will be used.")
|
43 |
+
else:
|
44 |
+
print("CUDA is not available. CPU will be used.")
|
45 |
+
# Load model directly
|
46 |
+
model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
|
47 |
+
# To use a different branch, change revision
|
48 |
+
# For example: revision="main"
|
49 |
+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="auto", trust_remote_code=True,revision="main")
|
50 |
+
|
51 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
52 |
+
|
53 |
+
return model, tokenizer
|
54 |
+
|
55 |
+
|
56 |
@app.get("/")
|
57 |
async def read_root():
|
58 |
return {"message": "Welcome to Eren Bot!"}
|