Vitrous commited on
Commit
5e2b380
·
verified ·
1 Parent(s): 8f09f84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -5,13 +5,10 @@ import os
5
  import torch
6
  import optimum
7
  from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
8
- from fastapi.middleware.cors import CORSMiddleware
9
-
10
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
11
  torch.cuda.empty_cache()
12
  torch.cuda.set_per_process_memory_fraction(0.8) # Adjust the fraction as needed
13
 
14
- app = FastAPI(root_path="/api/v1")
15
 
16
  # Load the model and tokenizer
17
  model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
@@ -39,7 +36,9 @@ def load_model_norm():
39
 
40
  model, tokenizer = load_model_norm()
41
 
42
-
 
 
43
  # Function to generate a response using the model
44
 
45
  def generate_response(prompt: str) -> str:
 
5
  import torch
6
  import optimum
7
  from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
 
 
8
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
9
  torch.cuda.empty_cache()
10
  torch.cuda.set_per_process_memory_fraction(0.8) # Adjust the fraction as needed
11
 
 
12
 
13
  # Load the model and tokenizer
14
  model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
 
36
 
37
  model, tokenizer = load_model_norm()
38
 
39
+ #Now we can init the FlaskApi
40
+ app = FastAPI(root_path="/api/v1")
41
+
42
  # Function to generate a response using the model
43
 
44
  def generate_response(prompt: str) -> str: