ffreemt
commited on
Commit
•
039548e
1
Parent(s):
128e040
Update
Browse files- m3_server.py +10 -3
m3_server.py
CHANGED
@@ -15,9 +15,9 @@ from pydantic import BaseModel
|
|
15 |
from starlette.status import HTTP_504_GATEWAY_TIMEOUT
|
16 |
|
17 |
Path("/tmp/cache").mkdir(exist_ok=True)
|
18 |
-
os.environ[
|
19 |
-
|
20 |
-
|
21 |
|
22 |
batch_size = 2 # gpu batch_size in order of your available vram
|
23 |
max_request = 10 # max request for future improvements on api calls / gpu batches (for now is pretty basic)
|
@@ -31,6 +31,13 @@ port = 3000
|
|
31 |
port = 7860
|
32 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
class m3Wrapper:
|
36 |
def __init__(self, model_name: str, device: str = DEVICE):
|
|
|
15 |
from starlette.status import HTTP_504_GATEWAY_TIMEOUT
|
16 |
|
17 |
Path("/tmp/cache").mkdir(exist_ok=True)
|
18 |
+
os.environ["HF_HOME"] = "/tmp/cache"
|
19 |
+
os.environ["TRANSFORMERS_CACHE"] = "/tmp/cache"
|
20 |
+
# does not quite work, need Path("/tmp/cache").mkdir(exist_ok=True)?
|
21 |
|
22 |
batch_size = 2 # gpu batch_size in order of your available vram
|
23 |
max_request = 10 # max request for future improvements on api calls / gpu batches (for now is pretty basic)
|
|
|
31 |
port = 7860
|
32 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
33 |
|
34 |
+
os.environ["TZ"] = "Asia/Shanghai"
|
35 |
+
try:
|
36 |
+
time.tzset() # type: ignore # pylint: disable=no-member
|
37 |
+
except Exception:
|
38 |
+
# Windows
|
39 |
+
print("Windows, cant run time.tzset()")
|
40 |
+
|
41 |
|
42 |
class m3Wrapper:
|
43 |
def __init__(self, model_name: str, device: str = DEVICE):
|