File size: 4,930 Bytes
ca45307 dbf0e6e ca45307 dbf0e6e ca45307 dbf0e6e ca45307 aa98774 ca2d9b5 ca45307 dbf0e6e aa98774 dbf0e6e 25f44aa dbf0e6e dbf26bf dbf0e6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
---
base_model: tomo1222/gemma-2-27b-bf16-4bit
tags:
- text-generation-inference
- transformers
- unsloth
- gemma2
- trl
license: gemma
language:
- jp
datasets:
- llm-jp/magpie-sft-v1.0
- tomo1222/Japanese-QA111dataset
---
# Uploaded model
- **Developed by:** tomo1222
- **License:** Gemma
- **Finetuned from model :** tomo1222/gemma-2-27b-bf16-4bit
tomo1222/gemma-2-27b-bf16-4bit : [google/gemma-2-27b](https://huggingface.co/google/gemma-2-27b)を[Unsloth](https://github.com/unslothai/unsloth)で直接用いるために、BitsAndBytesを用いて4bit量子化し、そのまま保存したもの。
This gemma2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
# output code
## library
```bash
pip install unsloth
pip install --no-deps --upgrade "flash-attn>=2.6.3"
pip install -U ragatouille
pip install fugashi unidic-lite
```
### inference code using Google Colaboratory(L4)
```python
from datasets import concatenate_datasets, load_dataset
from unsloth import FastLanguageModel
import random
import json
from huggingface_hub import login
from google.colab import userdata
login(userdata.get('HFtoken'))
with open("elyza-tasks-100-TV_0.jsonl","r",encoding='utf-8') as f:
tasks = [json.loads(l) for l in f.readlines()]
model_name = "tomo1222/Gemma2-27b-ft-jp-r64_alpha64"
max_seq_length = 4096
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = model_name,
max_seq_length = max_seq_length,
dtype = None,
load_in_4bit = True,
)
# google/gemma-2-9bのテンプレート
tokenizer.chat_template = """
{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}
"""
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
dataset = load_dataset("tomo1222/Japanese-QA111dataset")
ref_tasks = list(dataset["train"])
ref_tasks_input = [task["input"] for task in ref_tasks]
dic = {}
dic_input = {}
for i, task in enumerate(ref_tasks):
dic[ref_tasks_input[i]] = task["output"]
dic_input[ref_tasks_input[i]] = task["input"]
"""# 2. RAGのロード"""
from ragatouille import RAGPretrainedModel
RAG = RAGPretrainedModel.from_pretrained("bclavie/JaColBERTv2")
RAG.encode(ref_tasks_input)
def search_ref_input(input, k=10):
retreived=RAG.search_encoded_docs(query=input,k=k)
print(retreived)
text ="質問・文章をよく読んで、正確で親切な回答を書きなさい。\n"
for data in retreived[::-1]: # inverse order
key = data["content"]
output = dic[key]
input = dic_input[key]
text+="### 質問:\n"+input+"\n\n### 回答:\n"+output+"\n\n\n"
return text
"""# Prompt"""
output_data=[]
for i, task in enumerate(tasks):
text = search_ref_input(task["input"],16)+f"### 質問:\n{task['input']}\n\n### 回答:\n"
print(task["input"])
inputs = tokenizer(text, return_tensors="pt").to("cuda")
print(len(inputs['input_ids'][0]))
output = model.generate(**inputs, max_new_tokens=1024,repetition_penalty=1.2,use_cache=True,
bad_words_ids = [tokenizer.encode("質問", add_special_tokens=False),
tokenizer.encode("###", add_special_tokens=False),
tokenizer.encode("#", add_special_tokens=False),
tokenizer.encode("##", add_special_tokens=False),
tokenizer.encode("---", add_special_tokens=False),
tokenizer.encode("<h3>", add_special_tokens=False),
tokenizer.encode("filepath", add_special_tokens=False),
tokenizer.encode("> ", add_special_tokens=False),
]
)
output_text = tokenizer.decode(output[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
print(i,output_text)
print("---")
output_data.append({"task_id":i,"output":output_text})
with open("output.jsonl","w",encoding="utf-8") as f:
for result in output_data:
json.dump(result, f, ensure_ascii=False)
f.write('\n')
```
|