File size: 4,105 Bytes
2f32804 abcff1a 74785a1 2f32804 abcff1a 4855037 abcff1a 4855037 abcff1a 4855037 abcff1a 4855037 a3cd682 4855037 daa0894 76c15ee daa0894 76c15ee daa0894 abcff1a 2f32804 abcff1a 2f32804 abcff1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
---
base_model: llm-jp/llm-jp-3-13b
tags:
- text-generation-inference
- transformers
- unsloth
- llama
- trl
language:
- ja
license: cc-by-nc-sa-4.0
---
# はじめに
これは,東京大学松尾・岩澤研究室のLLM講座2024のコンペティションで提出するためのモデルです.
llm-jp/llm-jp-3-13bに,QLoRAによるSFTを施して,LoRAアダプタのみをこちらにアップしています.
chat templateは,weblab-GENIAC/Tanuki-8B-dpo-v1.0のものと同一のものを使用しています.
# 推論方法
提供された環境で,以下のように推論します.L4 GPU×1のインスタンスで,vLLMを用いて推論します.
Jupyter Notebookで,一かたまりごとに一つのセルになっています.順番に実行してください.
```python
!pip uninstall numpy -y
!pip install numpy==1.26.4
%%time
%pip install vllm==0.6.4.post1 --force-reinstall
!pip install ipywidgets
import time
import torch
#import pandas as pd
#import polars as pl
import transformers
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
#GemmaTokenizerFast,
#AutoModelForSequenceClassification,
BitsAndBytesConfig
)
import vllm ### packaging==24.1にしないとエラーになる!! ###
from vllm.lora.request import LoRARequest
from jinja2 import Template
print(vllm.__version__)
MAX_LENGTH = 1024
MODEL_NAME = "llm-jp/llm-jp-3-13b"
print(MODEL_NAME)
import os
os.environ["HF_TOKEN"] = "あなたのHugging Faceトークン"
from vllm.lora.request import LoRARequest
llm = vllm.LLM(
MODEL_NAME,
tensor_parallel_size=1, # 2, 4
gpu_memory_utilization=0.95,
trust_remote_code=True,
enforce_eager=True,
max_model_len=MAX_LENGTH,
enable_lora=True,
quantization="bitsandbytes",
load_format="bitsandbytes"
)
tokenizer = llm.get_tokenizer()
from transformers import AutoTokenizer
sft_tokenizer = AutoTokenizer.from_pretrained(
"weblab-GENIAC/Tanuki-8B-dpo-v1.0"
)
tokenizer.chat_template = sft_tokenizer.chat_template
from huggingface_hub import snapshot_download
lora_path = snapshot_download(repo_id="OsakanaTeishoku/1204lora")
from datasets import load_dataset
# jsonlファイルのパスを指定します。
data_files = {"test": "elyza-tasks-100-TV_0.jsonl"} # "your_jsonl_file.jsonl" を実際のファイル名に置き換えてください
# load_dataset関数を使用してデータを読み込みます。
tasks = load_dataset("json", data_files=data_files, split="test")
# データセットを確認します。
messages_list = [
[{"role": "user", "content": tasks["input"][i]}] for i in range(len(tasks))
]
prompts = [line[0]["content"] for line in messages_list]
prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in messages_list]
sampling_params = vllm.SamplingParams(
temperature=0.7,
max_tokens=1024,
repetition_penalty=1.05,
top_p=0.9,
)
outputs = llm.generate(
prompt_token_ids=prompt_token_ids,
sampling_params=sampling_params,
lora_request=LoRARequest("lora", 1, lora_path), # LoRA adapter
)
for prompt, response in zip(prompts, outputs):
print("prompt:", prompt)
print("output:", response.outputs[0].text.strip())
print("-"*80)
import json
data = [{
"task_id": i,
#"input": prompts[i],
"output": outputs[i].outputs[0].text.strip()
} for i in range(len(tasks))]
file_path_with_unicode = 'output.jsonl'
with open(file_path_with_unicode, 'w', encoding='utf-8') as file:
for entry in data:
json.dump(entry, file, ensure_ascii=False)
file.write('\n')
print(f"Saved json {file_path_with_unicode} !")
```
# Uploaded model
- **Developed by:** OsakanaTeishoku
- **License:** cc-by-nc-sa-4.0
- **Finetuned from model :** llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth) |