Artix

Runtime error

Vitrous commited on Feb 16, 2024

Commit

5e2b380

verified ·

1 Parent(s): 8f09f84

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,13 +5,10 @@ import os
 import torch
 import optimum
 from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
-from fastapi.middleware.cors import CORSMiddleware
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 torch.cuda.empty_cache()
 torch.cuda.set_per_process_memory_fraction(0.8)  # Adjust the fraction as needed
-app = FastAPI(root_path="/api/v1")
 # Load the model and tokenizer
 model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
@@ -39,7 +36,9 @@ def load_model_norm():
 model, tokenizer = load_model_norm()
 # Function to generate a response using the model
 def generate_response(prompt: str) -> str:

 import torch
 import optimum
 from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 torch.cuda.empty_cache()
 torch.cuda.set_per_process_memory_fraction(0.8)  # Adjust the fraction as needed
 # Load the model and tokenizer
 model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
 model, tokenizer = load_model_norm()
+#Now we can init the FlaskApi
+app = FastAPI(root_path="/api/v1")
 # Function to generate a response using the model
 def generate_response(prompt: str) -> str: