OsakanaTeishoku
commited on
Commit
•
a3cd682
1
Parent(s):
74785a1
Update README.md
Browse files
README.md
CHANGED
@@ -48,15 +48,7 @@ from jinja2 import Template
|
|
48 |
print(vllm.__version__)
|
49 |
|
50 |
MAX_LENGTH = 1024
|
51 |
-
# BATCH_SIZE = 2
|
52 |
-
# MODEL_NAME = "/kaggle/input/gemma/transformers/2b-it/3"
|
53 |
-
# CHECKPOINT_PATH = "/kaggle/input/atmacup17-train-causal/checkpoint-468"
|
54 |
-
#MODEL_NAME = "/kaggle/input/gemma2-9b-it-for-t4"
|
55 |
-
#CHECKPOINT_PATH = "/kaggle/input/exp002-gemma-2-9b-it"
|
56 |
-
#MODEL_NAME = "team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-AWQ"
|
57 |
-
#MODEL_NAME = "llm-jp/llm-jp-3-3.7b-instruct"
|
58 |
MODEL_NAME = "llm-jp/llm-jp-3-13b"
|
59 |
-
#MODEL_NAME = "OsakanaTeishoku/1127-13b-magpie-sft-step170"
|
60 |
print(MODEL_NAME)
|
61 |
|
62 |
import os
|
@@ -64,16 +56,13 @@ os.environ["HF_TOKEN"] = "あなたのHugging Faceトークン"
|
|
64 |
|
65 |
from vllm.lora.request import LoRARequest
|
66 |
llm = vllm.LLM(
|
67 |
-
MODEL_NAME,
|
68 |
tensor_parallel_size=1, # 2, 4
|
69 |
gpu_memory_utilization=0.95,
|
70 |
trust_remote_code=True,
|
71 |
-
#enable_lora=True,
|
72 |
-
#dtype="half",
|
73 |
enforce_eager=True,
|
74 |
max_model_len=MAX_LENGTH,
|
75 |
enable_lora=True,
|
76 |
-
#quantization="AWQ",
|
77 |
quantization="bitsandbytes",
|
78 |
load_format="bitsandbytes"
|
79 |
)
|
|
|
48 |
print(vllm.__version__)
|
49 |
|
50 |
MAX_LENGTH = 1024
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
MODEL_NAME = "llm-jp/llm-jp-3-13b"
|
|
|
52 |
print(MODEL_NAME)
|
53 |
|
54 |
import os
|
|
|
56 |
|
57 |
from vllm.lora.request import LoRARequest
|
58 |
llm = vllm.LLM(
|
59 |
+
MODEL_NAME,
|
60 |
tensor_parallel_size=1, # 2, 4
|
61 |
gpu_memory_utilization=0.95,
|
62 |
trust_remote_code=True,
|
|
|
|
|
63 |
enforce_eager=True,
|
64 |
max_model_len=MAX_LENGTH,
|
65 |
enable_lora=True,
|
|
|
66 |
quantization="bitsandbytes",
|
67 |
load_format="bitsandbytes"
|
68 |
)
|