File size: 2,708 Bytes
7c5411b 134b59a 7c5411b 81fb5a7 134b59a 49189bb 43b536d 96c19dc 089ded4 7c5411b 49189bb 134b59a 49189bb 134b59a e2abfb8 134b59a 49189bb e2abfb8 c6429b2 49189bb 134b59a 49189bb 134b59a 49189bb 134b59a 49189bb 134b59a e2abfb8 134b59a e2abfb8 49189bb 134b59a 84762cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
---
library_name: transformers
tags:
- unsloth
license: apache-2.0
datasets:
- llm-jp/magpie-sft-v1.0
language:
- ja
base_model:
- google/gemma-2-9b
---
### Uploaded model
- **Developed by:** [Hizaneko]
- **License:** [apache-2.0]
- **Finetuned from model:** [google/gemma-2-9b]
## Hugging Faceにアップロードしたモデルを用いてELYZA-tasks-100-TVの出力を得るためのコードです。
## Uses
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -U torch
!pip install -U peft
from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import json
from tqdm import tqdm
import re
from google.colab import userdata
HF_TOKEN=userdata.get('HF_TOKEN')
### ベースとなるモデルと学習したLoRAのアダプタ(Hugging FaceのIDを指定)。
### HFからモデルリポジトリをダウンロード
!huggingface-cli login --token $HF_TOKEN
!huggingface-cli download google/gemma-2-9b --local-dir gemma-2-9b/
model_id = "./gemma-2-9b"
adapter_id = "Hizaneko/gemma-2-9b-nyan100"
### unslothのFastLanguageModelで元のモデルをロード。
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model_id,
dtype=dtype,
load_in_4bit=load_in_4bit,
trust_remote_code=True,
)
# 元のモデルにLoRAのアダプタを統合。
model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)
# 事前にデータをアップロードしてください。
datasets = []
with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
item = ""
for line in f:
line = line.strip()
item += line
if item.endswith("}"):
datasets.append(json.loads(item))
item = ""
### 推論するためにモデルのモードを変更
FastLanguageModel.for_inference(model)
results = []
for dt in tqdm(datasets):
input = dt["input"]
prompt = f"""### 指示\n{input} 簡潔に回答してください \n### 回答\n"""
inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
results.append({"task_id": dt["task_id"], "input": input, "output": prediction})
### 結果をjsonlで保存。
json_file_id = re.sub(".*/", "", adapter_id)
with open(f"/content/{json_file_id}_output.jsonl", 'w', encoding='utf-8') as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write('\n') |