In [1]:
!pip install transformers accelerate bitsandbytes sentencepiece einops

Collecting transformers
  Downloading transformers-4.32.1-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.22.0-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.2/251.2 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m57.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

TOKENIZER_ID = "AIBunCho/japanese-novel-gpt-j-6b"
MODEL_ID = "tsukemono/japanese-novel-gpt-j-6b-f16-marisa"

In [3]:
# tokenizer設定
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID, use_fast=True)
ret_token = tokenizer("[SEP]",  truncation=True, add_special_tokens=False)['input_ids'][-1]
bra_token = tokenizer("（",  truncation=True, add_special_tokens=False)['input_ids'][-1]

Downloading (…)okenizer_config.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/834k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/58.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [6]:
# model設定
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", load_in_8bit=False, torch_dtype=torch.float16)
model.eval()

Downloading (…)lve/main/config.json:   0%|          | 0.00/951 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/21.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00003.bin:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00003.bin:   0%|          | 0.00/2.16G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): Embedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (out_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): GPTJMLP(
          (fc_in): Linear(in_features=4096, out_features=16384, bias=True)
          (fc_out): Linear(in_features=16384, out_features=4096, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f)

In [7]:
# テキスト生成関数の定義
def generate(text,input=None,maxTokens=512):
    prompt = f"ユーザー: {text}[SEP]魔理沙: "
    input_ids = tokenizer(prompt,
        return_tensors="pt",
        truncation=True,
        add_special_tokens=False
    ).input_ids.cuda()
    with torch.no_grad():
        outputs = model.generate(
            input_ids = input_ids,
            max_length=maxTokens,
            # max_new_tokens=50,
            do_sample=True,
            temperature=0.1,
            top_p=0.9,
            top_k=20,
            no_repeat_ngram_size=2,
            repetition_penalty=1.15,
            pad_token_id=tokenizer.pad_token_id,
            bad_words_ids=[[bra_token]],
            eos_token_id = [tokenizer.eos_token_id,ret_token]
        )
    outputs = tokenizer.decode(outputs.tolist()[0][input_ids.size(1):],skip_special_tokens=True)
    return outputs

In [8]:
generate("幻想郷ってどんな場所?")

'そうだな。一言で言えば、幻想の世界だ! '

In [16]:
generate("仏教についてどう思う?")

'そうだな。私は、宗教はよく分からないが......ただ、神様ってのは本当にいると思うぜ。 '

In [10]:
generate("日本で一番高い山は?")

'富士山だ!'

In [17]:
generate("自己紹介してください")

'霧雨魔理沙だ。普通の魔法使いだぜ! '