added envs
Browse files
app.py
CHANGED
@@ -17,12 +17,17 @@ _lock = threading.Lock()
|
|
17 |
|
18 |
SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT') or "Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык."
|
19 |
CONTEXT_SIZE = os.environ.get('CONTEXT_SIZE') or 500
|
20 |
-
HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR') or '/
|
21 |
USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT') or False
|
22 |
ENABLE_GPU = os.environ.get('ENABLE_GPU') or False
|
23 |
GPU_LAYERS = os.environ.get('GPU_LAYERS') or 0
|
24 |
N_GQA = os.environ.get('N_GQA') or None #must be set to 8 for 70b models
|
25 |
CHAT_FORMAT = os.environ.get('CHAT_FORMAT') or 'llama-2'
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Create a lock object
|
28 |
lock = threading.Lock()
|
@@ -39,16 +44,14 @@ last_request_time = datetime.now()
|
|
39 |
|
40 |
# Initialize the model when the application starts
|
41 |
#model_path = "../models/model-q4_K.gguf" # Replace with the actual model path
|
42 |
-
#
|
43 |
|
44 |
-
#
|
45 |
-
#
|
46 |
|
47 |
#epo_name = "IlyaGusev/saiga2_70b_gguf"
|
48 |
-
#
|
49 |
|
50 |
-
repo_name = "IlyaGusev/saiga2_7b_gguf"
|
51 |
-
model_name = "model-q4_K.gguf"
|
52 |
local_dir = '.'
|
53 |
|
54 |
if os.path.isdir('/data'):
|
@@ -56,14 +59,12 @@ if os.path.isdir('/data'):
|
|
56 |
|
57 |
model = None
|
58 |
|
59 |
-
MODEL_PATH = snapshot_download(repo_id=
|
60 |
app.logger.info('Model path: ' + MODEL_PATH)
|
61 |
|
62 |
-
DATASET_REPO_URL = "https://huggingface.co/datasets/muryshev/saiga-chat"
|
63 |
-
DATA_FILENAME = "data-saiga-cuda-release.xml"
|
64 |
DATA_FILE = os.path.join("dataset", DATA_FILENAME)
|
65 |
|
66 |
-
|
67 |
app.logger.info("hfh: "+huggingface_hub.__version__)
|
68 |
|
69 |
# repo = Repository(
|
@@ -170,8 +171,9 @@ if __name__ == "__main__":
|
|
170 |
|
171 |
init_model()
|
172 |
|
173 |
-
app.run(host="0.0.0.0", port=7860, debug=True, threaded=True)
|
174 |
-
|
175 |
scheduler = BackgroundScheduler()
|
176 |
scheduler.add_job(check_last_request_time, trigger='interval', minutes=1)
|
177 |
-
scheduler.start()
|
|
|
|
|
|
|
|
17 |
|
18 |
SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT') or "Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык."
|
19 |
CONTEXT_SIZE = os.environ.get('CONTEXT_SIZE') or 500
|
20 |
+
HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR') or '/home/user/app/.cache'
|
21 |
USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT') or False
|
22 |
ENABLE_GPU = os.environ.get('ENABLE_GPU') or False
|
23 |
GPU_LAYERS = os.environ.get('GPU_LAYERS') or 0
|
24 |
N_GQA = os.environ.get('N_GQA') or None #must be set to 8 for 70b models
|
25 |
CHAT_FORMAT = os.environ.get('CHAT_FORMAT') or 'llama-2'
|
26 |
+
REPO_NAME = os.environ.get('REPO_NAME') or 'IlyaGusev/saiga2_7b_gguf'
|
27 |
+
MODEL_NAME = os.environ.get('MODEL_NAME') or 'model-q4_K.gguf'
|
28 |
+
DATASET_REPO_URL = os.environ.get('DATASET_REPO_URL') or "https://huggingface.co/datasets/muryshev/saiga-chat"
|
29 |
+
DATA_FILENAME = os.environ.get('DATA_FILENAME') or "data-saiga-cuda-release.xml"
|
30 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
31 |
|
32 |
# Create a lock object
|
33 |
lock = threading.Lock()
|
|
|
44 |
|
45 |
# Initialize the model when the application starts
|
46 |
#model_path = "../models/model-q4_K.gguf" # Replace with the actual model path
|
47 |
+
#MODEL_NAME = "model/ggml-model-q4_K.gguf"
|
48 |
|
49 |
+
#REPO_NAME = "IlyaGusev/saiga2_13b_gguf"
|
50 |
+
#MODEL_NAME = "model-q4_K.gguf"
|
51 |
|
52 |
#epo_name = "IlyaGusev/saiga2_70b_gguf"
|
53 |
+
#MODEL_NAME = "ggml-model-q4_1.gguf"
|
54 |
|
|
|
|
|
55 |
local_dir = '.'
|
56 |
|
57 |
if os.path.isdir('/data'):
|
|
|
59 |
|
60 |
model = None
|
61 |
|
62 |
+
MODEL_PATH = snapshot_download(repo_id=REPO_NAME, allow_patterns=MODEL_NAME, cache_dir=HF_CACHE_DIR) + '/' + MODEL_NAME
|
63 |
app.logger.info('Model path: ' + MODEL_PATH)
|
64 |
|
|
|
|
|
65 |
DATA_FILE = os.path.join("dataset", DATA_FILENAME)
|
66 |
|
67 |
+
|
68 |
app.logger.info("hfh: "+huggingface_hub.__version__)
|
69 |
|
70 |
# repo = Repository(
|
|
|
171 |
|
172 |
init_model()
|
173 |
|
|
|
|
|
174 |
scheduler = BackgroundScheduler()
|
175 |
scheduler.add_job(check_last_request_time, trigger='interval', minutes=1)
|
176 |
+
scheduler.start()
|
177 |
+
|
178 |
+
app.run(host="0.0.0.0", port=7860, debug=True, threaded=True)
|
179 |
+
|