yottan-wywy commited on
Commit
6b138da
·
verified ·
1 Parent(s): 9484d07

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +65 -1
README.md CHANGED
@@ -18,12 +18,76 @@ base_model:
18
  - trl==0.12.2
19
  - transformers<4.47.0
20
  - tokenizers==0.21.0
 
 
 
21
 
22
  ## Usage
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ```py
25
  results = []
26
- system_text = "以下は、タスクを説明する指示です。要求を適切に満たす回答を**簡潔に**書いてください。回答の後ろに、回答の理由を**1文で**書いてください。"
27
  for data in tqdm(datasets):
28
 
29
  input_text = data["input"]
 
18
  - trl==0.12.2
19
  - transformers<4.47.0
20
  - tokenizers==0.21.0
21
+ - bitsandbytes==0.45.0
22
+ - peft==0.14.0
23
+ - datasets==3.2.0
24
 
25
  ## Usage
26
 
27
+ Google Colaboratory(L4 GPU)にて実行
28
+
29
+ ```py
30
+ from transformers import (
31
+ AutoModelForCausalLM,
32
+ AutoTokenizer,
33
+ BitsAndBytesConfig,
34
+ TrainingArguments,
35
+ logging,
36
+ )
37
+ from peft import (
38
+ LoraConfig,
39
+ PeftModel,
40
+ get_peft_model,
41
+ )
42
+
43
+ import os, torch, gc, json
44
+ from tqdm import tqdm
45
+ from datasets import load_dataset
46
+ import bitsandbytes as bnb
47
+ from trl import SFTTrainer
48
+ from google.colab import userdata
49
+
50
+ # Hugging Face Token
51
+ os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY")
52
+ os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")
53
+ ```
54
+
55
+ ```py
56
+
57
+ # 推論データ準備
58
+ datasets = []
59
+
60
+ inference_data_path = '/content/drive/MyDrive/your_path'
61
+ with open(f"{inference_data_path}/elyza-tasks-100-TV_0.jsonl", "r") as f:
62
+ item = ""
63
+ for line in f:
64
+ line = line.strip()
65
+ item += line
66
+ if item.endswith("}"):
67
+ datasets.append(json.loads(item))
68
+ item = ""
69
+
70
+ # モデルとトークナイザー準備
71
+ new_model_id = "yottan-wywy/llm-jp-3-13b-instruct-finetune_1217"
72
+
73
+ bnb_config = BitsAndBytesConfig(
74
+ load_in_4bit=True,
75
+ bnb_4bit_quant_type="nf4",
76
+ bnb_4bit_compute_dtype=torch.bfloat16,
77
+ )
78
+
79
+ model = AutoModelForCausalLM.from_pretrained(
80
+ new_model_id,
81
+ quantization_config=bnb_config,
82
+ device_map="auto"
83
+ )
84
+
85
+ tokenizer = AutoTokenizer.from_pretrained(new_model_id, trust_remote_code=True)
86
+ ```
87
+
88
  ```py
89
  results = []
90
+ system_text = "以下は、タスクを説明する指示です。要求を適切に満たす回答を**簡潔に**書きなさい。"
91
  for data in tqdm(datasets):
92
 
93
  input_text = data["input"]