Vitrous commited on
Commit
88182e3
·
verified ·
1 Parent(s): ace0225

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -2,16 +2,19 @@ import uvicorn
2
  from fastapi import FastAPI, HTTPException, Request
3
  from auto_gptq import AutoGPTQForCausalLM
4
  import os
5
- os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
6
  import torch
7
  import optimum
8
  from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
9
  from fastapi.middleware.cors import CORSMiddleware
10
 
 
 
 
 
11
  app = FastAPI(title="Deploying FastAPI Apps on Huggingface")
12
  app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_methods=['*'], allow_headers=['*'],)
13
 
14
-
15
  # Load the model and tokenizer
16
  model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
17
  # Dictionary to store conversation threads and their context
 
2
  from fastapi import FastAPI, HTTPException, Request
3
  from auto_gptq import AutoGPTQForCausalLM
4
  import os
 
5
  import torch
6
  import optimum
7
  from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
8
  from fastapi.middleware.cors import CORSMiddleware
9
 
10
+ os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
11
+ torch.cuda.empty_cache()
12
+ torch.cuda.set_per_process_memory_fraction(0.8) # Adjust the fraction as needed
13
+
14
  app = FastAPI(title="Deploying FastAPI Apps on Huggingface")
15
  app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_methods=['*'], allow_headers=['*'],)
16
 
17
+
18
  # Load the model and tokenizer
19
  model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
20
  # Dictionary to store conversation threads and their context