tomo1222 commited on
Commit
dbf0e6e
·
verified ·
1 Parent(s): 509f14b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +107 -3
README.md CHANGED
@@ -6,17 +6,121 @@ tags:
6
  - unsloth
7
  - gemma2
8
  - trl
9
- license: apache-2.0
10
  language:
11
- - en
 
 
 
12
  ---
13
 
14
  # Uploaded model
15
 
16
  - **Developed by:** tomo1222
17
- - **License:** apache-2.0
18
  - **Finetuned from model :** tomo1222/gemma-2-27b-bf16-4bit
19
 
20
  This gemma2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
21
 
22
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  - unsloth
7
  - gemma2
8
  - trl
9
+ license: gemma
10
  language:
11
+ - jp
12
+ datasets:
13
+ - llm-jp/magpie-sft-v1.0
14
+ - tomo1222/Japanese-QA111dataset
15
  ---
16
 
17
  # Uploaded model
18
 
19
  - **Developed by:** tomo1222
20
+ - **License:** Gemma
21
  - **Finetuned from model :** tomo1222/gemma-2-27b-bf16-4bit
22
 
23
  This gemma2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
24
 
25
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
26
+
27
+ # output code
28
+
29
+ ## library
30
+ ```bash
31
+ pip install unsloth
32
+ pip install --no-deps --upgrade "flash-attn>=2.6.3"
33
+ pip install -U ragatouille
34
+ pip install fugashi unidic-lite
35
+ ```
36
+
37
+ ### inference sample
38
+ ```python
39
+ from datasets import concatenate_datasets, load_dataset
40
+ from unsloth import FastLanguageModel
41
+ import random
42
+ import json
43
+
44
+ from huggingface_hub import login
45
+ from google.colab import userdata
46
+ login(userdata.get('HFtoken'))
47
+
48
+
49
+ with open("elyza-tasks-100-TV_0.jsonl","r",encoding='utf-8') as f:
50
+ tasks = [json.loads(l) for l in f.readlines()]
51
+
52
+ model_name = "tomo1222/Gemma2-27b-ft-jp-r64_alpha64"
53
+
54
+
55
+ max_seq_length = 4096
56
+
57
+ model, tokenizer = FastLanguageModel.from_pretrained(
58
+ model_name = model_name,
59
+ max_seq_length = max_seq_length,
60
+ dtype = None,
61
+ load_in_4bit = True,
62
+ )
63
+
64
+ tokenizer.chat_template = """
65
+ {{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}
66
+ """
67
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
68
+
69
+ with open("Japanese-QA111dataset.jsonl","r",encoding='utf-8') as f:
70
+ ref_tasks = [json.loads(l) for l in f.readlines()]
71
+ ref_tasks_input =[task["input"] for task in ref_tasks]
72
+
73
+ dic = {}
74
+ dic_input = {}
75
+ for i, task in enumerate(ref_tasks):
76
+ dic[ref_tasks_input[i]] = task["output"]
77
+ dic_input[ref_tasks_input[i]] = task["input"]
78
+
79
+ """# 2. RAGのロード"""
80
+
81
+ from ragatouille import RAGPretrainedModel
82
+ RAG = RAGPretrainedModel.from_pretrained("bclavie/JaColBERTv2")
83
+ RAG.encode(ref_tasks_input)
84
+
85
+ def search_ref_input(input, k=10):
86
+ retreived=RAG.search_encoded_docs(query=input,k=k)
87
+ print(retreived)
88
+ text ="質問・文章をよく読んで、正確で親切な回答を書きなさい。\n"
89
+ for data in retreived[::-1]: # inverse order
90
+ key = data["content"]
91
+ output = dic[key]
92
+ input = dic_input[key]
93
+ text+="### 質問:\n"+input+"\n\n### 回答:\n"+output+"\n\n\n"
94
+ return text
95
+
96
+ """# Prompt"""
97
+
98
+ output_data=[]
99
+
100
+ for i, task in enumerate(tasks):
101
+ text = search_ref_input(task["input"],16)+f"### 質問:\n{task['input']}\n\n### 回答:\n"
102
+ print(task["input"])
103
+ inputs = tokenizer(text, return_tensors="pt").to("cuda")
104
+ print(len(inputs['input_ids'][0]))
105
+ output = model.generate(**inputs, max_new_tokens=1024,repetition_penalty=1.2,use_cache=True,
106
+ bad_words_ids = [tokenizer.encode("質問", add_special_tokens=False),
107
+ tokenizer.encode("###", add_special_tokens=False),
108
+ tokenizer.encode("#", add_special_tokens=False),
109
+ tokenizer.encode("##", add_special_tokens=False),
110
+ tokenizer.encode("---", add_special_tokens=False),
111
+ tokenizer.encode("<h3>", add_special_tokens=False),
112
+ tokenizer.encode("filepath", add_special_tokens=False),
113
+ tokenizer.encode("> ", add_special_tokens=False),
114
+ ]
115
+ )
116
+
117
+ output_text = tokenizer.decode(output[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
118
+ print(i,output_text)
119
+ print("---")
120
+ output_data.append({"task_id":i,"output":output_text})
121
+
122
+ with open("output.jsonl","w",encoding="utf-8") as f:
123
+ for result in output_data:
124
+ json.dump(result, f, ensure_ascii=False)
125
+ f.write('\n')
126
+ ```