Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -6,14 +6,14 @@ from pynvml import *
|
|
6 |
nvmlInit()
|
7 |
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
8 |
ctx_limit = 1024
|
9 |
-
title = "RWKV-4-Pile-7B-Instruct-
|
10 |
|
11 |
os.environ["RWKV_JIT_ON"] = '1'
|
12 |
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
|
13 |
|
14 |
from rwkv.model import RWKV
|
15 |
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-7b", filename=f"{title}.pth")
|
16 |
-
model = RWKV(model=model_path, strategy='cuda fp16i8 *
|
17 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
18 |
pipeline = PIPELINE(model, "20B_tokenizer.json")
|
19 |
|
|
|
6 |
nvmlInit()
|
7 |
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
8 |
ctx_limit = 1024
|
9 |
+
title = "RWKV-4-Pile-7B-Instruct-test5-20230329-ctx4096"
|
10 |
|
11 |
os.environ["RWKV_JIT_ON"] = '1'
|
12 |
os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
|
13 |
|
14 |
from rwkv.model import RWKV
|
15 |
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-7b", filename=f"{title}.pth")
|
16 |
+
model = RWKV(model=model_path, strategy='cuda fp16i8 *8 -> cuda fp16')
|
17 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
18 |
pipeline = PIPELINE(model, "20B_tokenizer.json")
|
19 |
|