Update README.md
Browse files
README.md
CHANGED
@@ -39,9 +39,10 @@ from tqdm import tqdm
|
|
39 |
import json
|
40 |
|
41 |
# Hugging Faceで取得したTokenをこちらに貼る。
|
42 |
-
|
|
|
43 |
|
44 |
-
|
45 |
adapter_id = "totsukash/llm-jp-3-13b-finetune"
|
46 |
|
47 |
# QLoRA config
|
@@ -68,7 +69,7 @@ model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)
|
|
68 |
# データセットの読み込み。
|
69 |
# (評価データセットのjsonlファイルのパスを設定してください)
|
70 |
datasets = []
|
71 |
-
with open("
|
72 |
item = ""
|
73 |
for line in f:
|
74 |
line = line.strip()
|
@@ -80,18 +81,18 @@ with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
|
|
80 |
# gemma
|
81 |
results = []
|
82 |
for data in tqdm(datasets):
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
|
96 |
# llmjp
|
97 |
results = []
|
|
|
39 |
import json
|
40 |
|
41 |
# Hugging Faceで取得したTokenをこちらに貼る。
|
42 |
+
from google.colab import userdata
|
43 |
+
HF_TOKEN = userdata.get('HF_TOKEN')
|
44 |
|
45 |
+
model_id = "llm-jp/llm-jp-3-13b"
|
46 |
adapter_id = "totsukash/llm-jp-3-13b-finetune"
|
47 |
|
48 |
# QLoRA config
|
|
|
69 |
# データセットの読み込み。
|
70 |
# (評価データセットのjsonlファイルのパスを設定してください)
|
71 |
datasets = []
|
72 |
+
with open("/content/elyza-tasks-100-TV_0.jsonl", "r") as f:
|
73 |
item = ""
|
74 |
for line in f:
|
75 |
line = line.strip()
|
|
|
81 |
# gemma
|
82 |
results = []
|
83 |
for data in tqdm(datasets):
|
84 |
+
input = data["input"]
|
85 |
+
prompt = f"""### 指示
|
86 |
+
{input}
|
87 |
+
### 回答
|
88 |
+
"""
|
89 |
+
|
90 |
+
# input_ids だけを取り出して使用
|
91 |
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
|
92 |
+
outputs = model.generate(input_ids, max_new_tokens=512, do_sample=False, repetition_penalty=1.2)
|
93 |
+
output = tokenizer.decode(outputs[0][input_ids.size(1):], skip_special_tokens=True)
|
94 |
+
|
95 |
+
results.append({"task_id": data["task_id"], "input": input, "output": output})
|
96 |
|
97 |
# llmjp
|
98 |
results = []
|