Update README.md
Browse files
README.md
CHANGED
@@ -54,7 +54,6 @@ L4でのコード実行時間は全体で約45分でした。
|
|
54 |
|
55 |
以下にGoogle Colabノートブックと同じコードを掲載します
|
56 |
```python
|
57 |
-
|
58 |
!pip install -q transformers==4.46.3 accelerate bitsandbytes
|
59 |
!pip install -q tqdm
|
60 |
!pip install flash-attn --no-build-isolation
|
@@ -74,9 +73,8 @@ print("2. '新しいシークレット'をクリック")
|
|
74 |
print("3. 名前に'HF_TOKEN'を入力")
|
75 |
print("4. 値にHugging Faceトークンを入力して保存")
|
76 |
print("ファイルタブ内にelyza-tasks-100-TV_0.jsonlを配置しておいてください")
|
77 |
-
print("出力物は、新規に作成される
|
78 |
|
79 |
-
# シークレットからHF_TOKENを取得
|
80 |
from google.colab import userdata
|
81 |
HF_TOKEN = userdata.get('HF_TOKEN')
|
82 |
|
@@ -91,6 +89,9 @@ quantization_config = BitsAndBytesConfig(
|
|
91 |
)
|
92 |
|
93 |
def load_model_and_tokenizer():
|
|
|
|
|
|
|
94 |
model_id = "Chrom256/gemma-2-9b-it-lora_20241216_033631"
|
95 |
base_model_id = "google/gemma-2-9b"
|
96 |
downloaded_components = {"model": None, "tokenizer": None}
|
@@ -126,21 +127,25 @@ def load_model_and_tokenizer():
|
|
126 |
with download_lock:
|
127 |
downloaded_components["tokenizer"] = tokenizer
|
128 |
|
|
|
129 |
torch.cuda.empty_cache()
|
130 |
|
131 |
-
|
132 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
133 |
model_future = executor.submit(download_base_model)
|
134 |
tokenizer_future = executor.submit(download_tokenizer)
|
135 |
|
|
|
136 |
model_future.result()
|
137 |
tokenizer_future.result()
|
138 |
|
139 |
model = downloaded_components["model"]
|
140 |
tokenizer = downloaded_components["tokenizer"]
|
141 |
|
|
|
142 |
torch.cuda.empty_cache()
|
143 |
|
|
|
144 |
try:
|
145 |
adapter_path = model_id
|
146 |
print(f"Loading adapter from {adapter_path}")
|
@@ -150,9 +155,11 @@ def load_model_and_tokenizer():
|
|
150 |
print(f"Error loading adapter: {e}")
|
151 |
raise
|
152 |
|
|
|
153 |
model.config.use_cache = True
|
154 |
model.eval()
|
155 |
|
|
|
156 |
torch.cuda.empty_cache()
|
157 |
|
158 |
return model, tokenizer
|
@@ -174,6 +181,7 @@ def run_inference(model, tokenizer, tokenized_inputs, generation_config, batch_s
|
|
174 |
""" for item in batch
|
175 |
]
|
176 |
|
|
|
177 |
inputs = tokenizer(
|
178 |
prompts,
|
179 |
padding=True,
|
@@ -197,6 +205,7 @@ def run_inference(model, tokenizer, tokenized_inputs, generation_config, batch_s
|
|
197 |
elif 'model' in response:
|
198 |
response = response.split('model')[-1].strip()
|
199 |
|
|
|
200 |
response = post_process_output(response)
|
201 |
|
202 |
results.append({
|
@@ -205,6 +214,7 @@ def run_inference(model, tokenizer, tokenized_inputs, generation_config, batch_s
|
|
205 |
"output": response
|
206 |
})
|
207 |
|
|
|
208 |
del outputs, inputs
|
209 |
torch.cuda.empty_cache()
|
210 |
|
|
|
54 |
|
55 |
以下にGoogle Colabノートブックと同じコードを掲載します
|
56 |
```python
|
|
|
57 |
!pip install -q transformers==4.46.3 accelerate bitsandbytes
|
58 |
!pip install -q tqdm
|
59 |
!pip install flash-attn --no-build-isolation
|
|
|
73 |
print("3. 名前に'HF_TOKEN'を入力")
|
74 |
print("4. 値にHugging Faceトークンを入力して保存")
|
75 |
print("ファイルタブ内にelyza-tasks-100-TV_0.jsonlを配置しておいてください")
|
76 |
+
print("出力物は、新規に作成されるoutputファイルの中に格納されます")
|
77 |
|
|
|
78 |
from google.colab import userdata
|
79 |
HF_TOKEN = userdata.get('HF_TOKEN')
|
80 |
|
|
|
89 |
)
|
90 |
|
91 |
def load_model_and_tokenizer():
|
92 |
+
"""
|
93 |
+
モデルとトークナイザーを並列でダウンロードし、チェックポイントをロードする
|
94 |
+
"""
|
95 |
model_id = "Chrom256/gemma-2-9b-it-lora_20241216_033631"
|
96 |
base_model_id = "google/gemma-2-9b"
|
97 |
downloaded_components = {"model": None, "tokenizer": None}
|
|
|
127 |
with download_lock:
|
128 |
downloaded_components["tokenizer"] = tokenizer
|
129 |
|
130 |
+
|
131 |
torch.cuda.empty_cache()
|
132 |
|
133 |
+
|
134 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
135 |
model_future = executor.submit(download_base_model)
|
136 |
tokenizer_future = executor.submit(download_tokenizer)
|
137 |
|
138 |
+
|
139 |
model_future.result()
|
140 |
tokenizer_future.result()
|
141 |
|
142 |
model = downloaded_components["model"]
|
143 |
tokenizer = downloaded_components["tokenizer"]
|
144 |
|
145 |
+
|
146 |
torch.cuda.empty_cache()
|
147 |
|
148 |
+
|
149 |
try:
|
150 |
adapter_path = model_id
|
151 |
print(f"Loading adapter from {adapter_path}")
|
|
|
155 |
print(f"Error loading adapter: {e}")
|
156 |
raise
|
157 |
|
158 |
+
|
159 |
model.config.use_cache = True
|
160 |
model.eval()
|
161 |
|
162 |
+
|
163 |
torch.cuda.empty_cache()
|
164 |
|
165 |
return model, tokenizer
|
|
|
181 |
""" for item in batch
|
182 |
]
|
183 |
|
184 |
+
|
185 |
inputs = tokenizer(
|
186 |
prompts,
|
187 |
padding=True,
|
|
|
205 |
elif 'model' in response:
|
206 |
response = response.split('model')[-1].strip()
|
207 |
|
208 |
+
|
209 |
response = post_process_output(response)
|
210 |
|
211 |
results.append({
|
|
|
214 |
"output": response
|
215 |
})
|
216 |
|
217 |
+
|
218 |
del outputs, inputs
|
219 |
torch.cuda.empty_cache()
|
220 |
|