How to Uses

# ライブラリのインストール
!pip install -U langchain-community langchain-huggingface vllm triton fugashi unidic-lite
# インストール
import json
import pandas as pd
from tqdm import tqdm
from transformers import pipeline
from langchain_community.llms import VLLM
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser


# GitHub repositoryのclone
!git clone https://github.com/y-hiroki-radiotech/llm-final-task.git
%cd llm-final-task

# タスク別に設定したプロンプトを使うために、PromptStockクラスをインスタンス化
from prompt import PromptStock
prompt_stock = PromptStock()

# データのpandas形式で準備する
file_path = 'elyza-tasks-100-TV_0.jsonl' # ここにjsonlを指定する
data = pd.read_json(file_path, lines=True)

# データのinputに対して、タスクラベルを与える。タスクを8分類してある。
model_name = "hiroki-rad/bert-base-classification-ft"
classify_pipe = pipeline(model=model_name, device="cuda:0")

results: list[dict[str, float | str]] = []
for example in data.itertuples():
    # モデルの予測結果を取得
    model_prediction = classify_pipe(example.input)[0]
    # 正解のラベルIDをラベル名に変換
    results.append( model_prediction["label"])

data["label"] = results

# タスク回答のためのモデルをvLLMを使ってインストール
model_name = "hiroki-rad/llm-jp-llm-jp-3-13b-16-ft"

llm = VLLM(model=model_name,
           quantization="awq")

# テンプレートの作成
template = """
ユーザー: 質問を良く読んで、適切な回答をしてください。
{context}
質問:{input}
回答:"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "input"],
    template_format="f-string"
)
# chainの作成
vllm_chain = prompt | llm

chain = (
    RunnablePassthrough()
    | vllm_chain
    | StrOutputParser()
)

outputs = []
total_rows = len(data)
with tqdm(total=total_rows,
          desc="Processing rows",
          position=0,
          leave=True
         ) as pbar:
    for row in data.itertuples():
        prompt_string = prompt_stock.get_prompt(row.label)

        input_dict = {
            "context": prompt_string,
            "input": row.input
        }

        output = chain.invoke(input_dict)
        outputs.append(output)

        pbar.update(1)

# 出力
jsonl_data = []

for i in range(len(data)):
    task_id = data.iloc[i]["task_id"]
    output = outputs[i]

    jsonl_object = {
        "task_id": task_id,
        "output": output
    }
    jsonl_data.append(jsonl_object)

with open("output.jsonl", "w") as outfile:
  for entry in jsonl_data:
    entry["task_id"] = int(entry["task_id"])
    json.dump(entry, outfile)
    outfile.write('\n')
Downloads last month
32
Safetensors
Model size
13.7B params
Tensor type
BF16
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for hiroki-rad/llm-jp-llm-jp-3-13b-16-ft

Finetuned
(1120)
this model

Dataset used to train hiroki-rad/llm-jp-llm-jp-3-13b-16-ft