Artix

Runtime error

Vitrous commited on Feb 18, 2024

Commit

cb06e39

verified ·

1 Parent(s): 5cd64cd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,6 +11,9 @@ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 torch.cuda.empty_cache()
 torch.cuda.set_per_process_memory_fraction(0.8)  # Adjust the fraction as needed
 # Load the model and tokenizer
 model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
 conversations = {}
@@ -34,8 +37,6 @@ def load_model_norm():
 model, tokenizer = load_model_norm()
-# Initialize FastAPI application
-app = FastAPI(root_path="/api/v1")
 def generate_response(msg_prompt: str) -> dict:
     """

 torch.cuda.empty_cache()
 torch.cuda.set_per_process_memory_fraction(0.8)  # Adjust the fraction as needed
+# Initialize FastAPI application
+app = FastAPI(root_path="/api/v1")
 # Load the model and tokenizer
 model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
 conversations = {}
 model, tokenizer = load_model_norm()
 def generate_response(msg_prompt: str) -> dict:
     """