Artix

Runtime error

Vitrous commited on Feb 15, 2024

Commit

88182e3

verified ·

1 Parent(s): ace0225

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,16 +2,19 @@ import uvicorn
 from fastapi import FastAPI, HTTPException, Request
 from auto_gptq import AutoGPTQForCausalLM
 import os
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
 import torch
 import optimum
 from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
 from fastapi.middleware.cors import CORSMiddleware
 app = FastAPI(title="Deploying FastAPI Apps on Huggingface")
 app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_methods=['*'], allow_headers=['*'],)
 # Load the model and tokenizer
 model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
 # Dictionary to store conversation threads and their context

 from fastapi import FastAPI, HTTPException, Request
 from auto_gptq import AutoGPTQForCausalLM
 import os
 import torch
 import optimum
 from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
 from fastapi.middleware.cors import CORSMiddleware
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
+torch.cuda.empty_cache()
+torch.cuda.set_per_process_memory_fraction(0.8)  # Adjust the fraction as needed
 app = FastAPI(title="Deploying FastAPI Apps on Huggingface")
 app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_methods=['*'], allow_headers=['*'],)
 # Load the model and tokenizer
 model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
 # Dictionary to store conversation threads and their context