chocopan commited on
Commit
aa71ddf
·
verified ·
1 Parent(s): c52fbf2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +119 -0
README.md CHANGED
@@ -33,6 +33,125 @@ HF_TOKEN, WB_TOKENはご自身のものに書き換えてください。<br>
33
  `--elyza-tasks-100-TV_0.jsonl
34
  ```
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ## Training Details
37
  ```
38
  training_arguments = TrainingArguments(
 
33
  `--elyza-tasks-100-TV_0.jsonl
34
  ```
35
 
36
+ ### Usage
37
+ Execute following code in Google Colab
38
+
39
+ ```python
40
+ !pip install -U pip
41
+ !pip install -U transformers
42
+ !pip install -U bitsandbytes
43
+ !pip install -U accelerate
44
+ !pip install -U datasets
45
+ !pip install -U peft
46
+
47
+ from transformers import AutoTokenizer, AutoModelForCausalLM
48
+ from peft import PeftModel
49
+ import torch
50
+ import bitsandbytes as bnb # bitsandbytesをインポート
51
+ import json
52
+ import re
53
+ from tqdm import tqdm
54
+ from sklearn.metrics import f1_score
55
+
56
+ # ベースモデルとLoRAアダプタのID
57
+ model_id = "llm-jp/llm-jp-3-13b"
58
+ adapter_id = "chocopan/llm-jp-3-13b-finetune-4bit"
59
+
60
+ # トークナイザーのロード
61
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
62
+
63
+ # モデルのロード (4bit量子化)
64
+ model = AutoModelForCausalLM.from_pretrained(
65
+ model_id,
66
+ load_in_4bit=True, # 4bit量子化を有効にする
67
+ bnb_4bit_use_double_quant=True, # double quantizationを使用 (さらにメモリ効率を高める)
68
+ bnb_4bit_quant_type="nf4", # 量子化のタイプ (NF4が推奨)
69
+ torch_dtype=torch.bfloat16, # bfloat16を使用
70
+ device_map="auto" # デバイスマップ
71
+ )
72
+
73
+ # LoRAアダプタのロード
74
+ model = PeftModel.from_pretrained(model, adapter_id)
75
+ model.eval()
76
+
77
+ # タスクとなるデータの読み込み。
78
+ # 事前にデータをアップロードしてください。
79
+ datasets = []
80
+ with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
81
+ item = ""
82
+ for line in f:
83
+ line = line.strip()
84
+ item += line
85
+ if item.endswith("}"):
86
+ datasets.append(json.loads(item))
87
+ item = ""
88
+
89
+
90
+ # 推論の実行
91
+ results = []
92
+ for dt in tqdm(datasets):
93
+ input_text = dt["input"]
94
+ prompt = f"""### 指示
95
+ {input}
96
+ ### 回答
97
+ """
98
+ inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
99
+ # Remove token_type_ids from inputs if present
100
+ if "token_type_ids" in inputs:
101
+ del inputs["token_type_ids"]
102
+ outputs = model.generate(**inputs, max_new_tokens=512, use_cache=True, do_sample=False, repetition_penalty=1.2)
103
+ prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
104
+ prediction = re.sub(r"[*#]", "", prediction).strip() # 前後の空白を削除
105
+ results.append({
106
+ "task_id": dt.get("task_id", None), # task_idがない場合への対応
107
+ "input": input_text,
108
+ "prediction": prediction,
109
+ "expected": dt.get("output", None) # 期待データ
110
+ })
111
+
112
+ # 評価
113
+ exact_match_count = 0
114
+ total_count = 0
115
+ f1_scores = []
116
+
117
+ for result in results:
118
+ if result["expected"] is None: # 期待データがない場合はスキップ
119
+ continue
120
+
121
+ total_count += 1
122
+ expected = result["expected"].strip() # 前後の空白を削除
123
+ prediction = result["prediction"].strip() # 前後の空白を削除
124
+
125
+ if prediction == expected:
126
+ exact_match_count += 1
127
+
128
+ # F1スコアの計算 (単語単位)
129
+ expected_words = expected.split()
130
+ prediction_words = prediction.split()
131
+
132
+ if len(expected_words) == 0 and len(prediction_words) == 0:
133
+ f1 = 1.0 # 両方空の場合は1.0
134
+ elif len(expected_words) == 0 or len(prediction_words) == 0:
135
+ f1 = 0.0 # 片方が空の場合は0.0
136
+ else:
137
+ f1 = f1_score(expected_words, prediction_words, average='micro') # 単語単位のF1スコア
138
+ f1_scores.append(f1)
139
+
140
+ # 評価結果の出力
141
+ exact_match_rate = exact_match_count / total_count if total_count > 0 else 0
142
+ average_f1 = sum(f1_scores) / len(f1_scores) if len(f1_scores) > 0 else 0
143
+
144
+ print(f"Exact Match Rate: {exact_match_rate:.4f}")
145
+ print(f"Average F1 Score: {average_f1:.4f}")
146
+
147
+ # 結果をjsonlで保存 (評価結果も追加)
148
+ json_file_id = re.sub(".*/", "", adapter_id)
149
+ with open(f"/content/{json_file_id}_output.jsonl", 'w', encoding='utf-8') as f:
150
+ for result in results:
151
+ result["exact_match"] = 1 if result["prediction"].strip() == result["expected"].strip() else 0 if result["expected"] is not None else None
152
+ f.write(json.dumps(result, ensure_ascii=False) + '\n')
153
+ ```
154
+
155
  ## Training Details
156
  ```
157
  training_arguments = TrainingArguments(