|
--- |
|
datasets: |
|
- hiroki-rad/elyza_generated_data-3031 |
|
language: |
|
- ja |
|
base_model: |
|
- llm-jp/llm-jp-3-13b |
|
--- |
|
```python |
|
!pip install -U langchain-community langchain-huggingface vllm triton wandb weave langchain-huggingface langchain-chroma datasets --q |
|
|
|
# GitHubをclone |
|
!git clone https://github.com/y-hiroki-radiotech/llm-final-task.git |
|
%cd llm-final-task |
|
|
|
import os |
|
import random |
|
import numpy as np |
|
import torch |
|
import pandas as pd |
|
from vllm import LLM |
|
from tqdm import tqdm |
|
import json |
|
from datasets import |
|
import time |
|
|
|
from custom_few_shot_prompt_template import CustomFewShotPromptTemplate |
|
|
|
|
|
# JSONLファイルを読み込む |
|
file_path = 'elyza-tasks-100-TV_0.jsonl' |
|
data = pd.read_json(file_path, lines=True) |
|
|
|
# example selector用のデータ |
|
df = load_dataset("elyza/ELYZA-tasks-100", split="test") |
|
df = df.to_pandas() |
|
examples = [] |
|
for row in df.itertuples(): |
|
examples.append({"input": row.input, "output": row.output}) |
|
|
|
|
|
few_shot = CustomFewShotPromptTemplate(examples) |
|
# few-shot-selector |
|
few_shot_list = [] |
|
for row in tqdm(data.itertuples(), desc="生成中"): |
|
few_shot_list.append(few_shot.format(row.input)) |
|
|
|
# 一度キャッシュを削除する |
|
if torch.cuda.is_available(): |
|
print("Clearing CUDA cache...") |
|
torch.cuda.empty_cache() |
|
print("CUDA cache cleared.") |
|
else: |
|
print("CUDA is not available on this system.") |
|
|
|
# vllmを使う |
|
model_name = "hiroki-rad/llm-jp-llm-jp-3-13b-128-ft-3000" |
|
llm = LLM(model=model_name) |
|
|
|
# 2回考えるように推論するクラスをインスタンス化 |
|
from two_stage_think import TwoStageThinking |
|
|
|
|
|
thinking_generator = TwoStageThinking(llm) |
|
|
|
# 最終的に1回推論の回答を使うことにした |
|
results = [] |
|
for row, few_shot in tqdm(zip(data.itertuples(), few_shot_list), desc="生成中"): |
|
time.sleep(3) |
|
first = thinking_generator.generate_complete_response(row, few_shot) |
|
results.append(first) |
|
|
|
# データの格納 |
|
jsonl_data = [] |
|
for i in range(len(data)): |
|
task_id = data.iloc[i]["task_id"] # Access task_id using the index |
|
output = results[i] |
|
|
|
# Create a dictionary for each row |
|
jsonl_object = { |
|
"task_id": task_id, |
|
"output": output |
|
} |
|
jsonl_data.append(jsonl_object) |
|
|
|
with open("llm-jp-vllm-second-thinking-output.jsonl", "w", encoding="utf-8") as outfile: |
|
for entry in jsonl_data: |
|
# Convert task_id to a regular Python integer before dumping |
|
entry["task_id"] = int(entry["task_id"]) |
|
json.dump(entry, outfile, ensure_ascii=False) |
|
outfile.write('\n') |
|
``` |