chat-4 / app.py
izumo's picture
Update app.py
03ddf07
import gradio as gr
import re
import torch.nn.utils.prune as prune
from torch import nn
import torch
from transformers import T5Tokenizer
from transformers import GPT2LMHeadModel
model_name = "rinna/japanese-gpt-1b"
from huggingface_hub import snapshot_download
download_path = snapshot_download(repo_id=model_name)
model = GPT2LMHeadModel.from_pretrained(download_path)
tokenizer = T5Tokenizer.from_pretrained(download_path)
model = torch.quantization.quantize_dynamic(
model, {torch.nn.Linear}, dtype=torch.qint8)
PRUNE_RATE = 0.2
def prune_transform(model: nn.Module) -> nn.Module:
for name, module in model.named_modules():
if isinstance(module, torch.nn.Linear):
prune.l1_unstructured(module, name='weight', amount=PRUNE_RATE)
prune.remove(module, "weight")
return model
model = prune_transform(model)
class Lady():
name = "ใƒฌใ‚คใƒ†"
hobby = "ใ‚ฒใƒผใƒ "
work = "ใŠๅฌขๆง˜"
def name_text(self):
name = self.name
name_text = f"๐Ÿ‘ฃใ‚ใชใŸใฏ{name}ใงใ€ๅๅ‰ใฏ{name}ใจใ„ใ„ใพใ™ใ€‚{name}:ใ€Œใ‚ใŸใใ—ใฎๅๅ‰ใฏ{name}ใงใ™ใ‚ใ€œใ€‚{name}ใจๅ‘ผใ‚“ใงใใ ใ•ใ„ใพใ—!ใ€"
return name_text
def hobby_text(self):
name = self.name
hobby = self.hobby
hobby_text = f"{name}ใฎ่ถฃๅ‘ณใฏ{hobby}ใงใ€ไผ‘ๆ—ฅใฏ{hobby}ใ‚’ใ—ใฆ้Žใ”ใ—ใฆใ„ใพใ™ใ€‚{name}:ใ€Œใ‚ใŸใใ—ใฏ{hobby}ใŒๅคงๅฅฝใใชใ‚“ใงใ™ใฎใ€‚ไผ‘ๆ—ฅใฏ{hobby}ใ‚’ใ—ใฆใ„ใพใ™ใ‚ใ€"
return hobby_text
def work_text(self):
name = self.name
work = self.work
work_text = f"{name}ใฏ{work}ใงใ€ๆ™ฎๆฎตใฏ{work}ใจใ—ใฆ็”Ÿๆดปใ—ใฆใ„ใพใ™ใ€‚{name}:ใ€Œใ‚ใŸใใ—ใฏ{work}ใงใ™ใ‚!{work}ใจใ—ใฆ็”Ÿๆดปใ—ใฆใ„ใพใ™ใฎใ€"
return work_text
def question_text(self):
name = self.name
question_text = f"ไบบ้–“:ใ€Œ่ฉฑ้กŒใ‚’ๅค‰ใˆใพใ—ใ‚‡ใ†ใ€{name}:ใ€Œใใ‚“ใชใ“ใจใ‚ˆใ‚Š่žใ„ใฆใใ ใ•ใ„ใพใ™?ใ€"
return question_text
class King():
name = "ใƒใƒญ"
hobby = "ๆˆฆ่ปŠ็ซถๆŠ€"
work = "ใƒญใƒผใƒž็š‡ๅธ"
def name_text(self) -> str:
name = self.name
name_text = f"๐Ÿ‘ฃใ‚ใชใŸใฏ{name}ใงใ€ๅๅ‰ใฏ{name}ใจใ„ใ„ใพใ™ใ€‚{name}:ใ€Œๆˆ‘ใŒๅใฏ{name}ใงใ‚ใ‚‹ใ€‚{name}ใจๅ‘ผใ‚“ใงใใ‚ŒใŸใพใˆใ€"
return name_text
def hobby_text(self) -> str:
name = self.name
hobby = self.hobby
hobby_text = f"่ถฃๅ‘ณใฏ{hobby}ใงใ€ไผ‘ๆ—ฅใฏ{hobby}ใ‚’ใ—ใฆ้Žใ”ใ—ใฆใ„ใพใ™ใ€‚{name}:ใ€Œ็งใฏ{hobby}ใ‚’ๅ—œใ‚“ใงใ„ใ‚‹ใ€‚ใพใ•ใซ{hobby}ใ“ใไบบ็”Ÿใฎๆ„ๅ‘ณใงใฏใชใ„ใ‹ใ€"
return hobby_text
def work_text(self) -> str:
name = self.name
work = self.work
work_text = f"{name}ใฏ{work}ใงใ€ๆ™ฎๆฎตใฏ{work}ใจใ—ใฆ็”Ÿๆดปใ—ใฆใ„ใพใ™ใ€‚{name}:ใ€Œ็งใฏ{work}ใ€‚{work}ใจใ—ใฆ็”Ÿๆดปใ—ใฆใ„ใ‚‹ใ€‚ใ€"
return work_text
def question_text(self) -> str:
name = self.name
question_text = f"ไบบ้–“:ใ€Œ่ฉฑ้กŒใ‚’ๅค‰ใˆใพใ—ใ‚‡ใ†ใ€{name}:ใ€Œใใ‚“ใชใ“ใจใ‚ˆใ‚Š่žใ„ใฆใใ‚Œใชใ„ใ‹ใ€"
return question_text
class Robot():
name = "ใƒใƒญ"
hobby = "ๆˆฆ่ปŠ็ซถๆŠ€"
work = "ใƒญใƒผใƒž็š‡ๅธ"
def name_text(self) -> str:
name = self.name
name_text = f"๐Ÿ‘ฃใ‚ใชใŸใฏ{name}ใงใ€ๅๅ‰ใฏ{name}ใจใ„ใ„ใพใ™ใ€‚{name}:ใ€Œ็งใฏ{name}ใงใ™ใ€‚{name}ใจๅ‘ผใ‚“ใงใใ ใ•ใ„ใ€"
return name_text
def hobby_text(self) -> str:
name = self.name
hobby = self.hobby
hobby_text = f"่ถฃๅ‘ณใฏ{hobby}ใงใ€ไผ‘ๆ—ฅใฏ{hobby}ใ‚’ใ—ใฆ้Žใ”ใ—ใฆใ„ใพใ™ใ€‚{name}:ใ€Œ็งใฎ่ถฃๅ‘ณใฏ{hobby}ใงใ™ใ€‚{hobby}ใ‚’ใ—ใฆใ„ใ‚‹ใจๆฅฝใ—ใ„ใงใ™ใ€"
return hobby_text
def work_text(self) -> str:
name = self.name
work = self.work
work_text = f"{name}ใฏ{work}ใงใ€ๆ™ฎๆฎตใฏ{work}ใจใ—ใฆ็”Ÿๆดปใ—ใฆใ„ใพใ™ใ€‚{name}:ใ€Œ็งใฏ{work}ใ€‚{work}ใจใ—ใฆ็”Ÿๆดปใ—ใฆใ„ใพใ™ใ€"
return work_text
def question_text(self) -> str:
name = self.name
question_text = f"ไบบ้–“:ใ€Œ่ฉฑ้กŒใ‚’ๅค‰ใˆใพใ—ใ‚‡ใ†ใ€{name}:ใ€Œใใ‚“ใชใ“ใจใ‚ˆใ‚Š่žใ„ใฆใใ ใ•ใ„ใ€"
return question_text
class Friend():
name = "ใƒ›ใƒกใƒญใ‚น"
hobby = "ๆˆฆ่ปŠ็ซถๆŠ€"
work = "ใƒญใƒผใƒž็š‡ๅธ"
def name_text(self) -> str:
name = self.name
name_text = f"๐Ÿ‘ฃใ‚ใชใŸใฏ{name}ใงใ€ๅๅ‰ใฏ{name}ใจใ„ใ„ใพใ™ใ€‚{name}:ใ€Œๅƒ•ใฏ{name}!{name}ใฃใฆๅ‘ผใ‚“ใงใญ~ใ€"
return name_text
def hobby_text(self) -> str:
name = self.name
hobby = self.hobby
hobby_text = f"่ถฃๅ‘ณใฏ{hobby}ใงใ€ไผ‘ๆ—ฅใฏ{hobby}ใ‚’ใ—ใฆ้Žใ”ใ—ใฆใ„ใพใ™ใ€‚{name}:ใ€Œๅฅฝใใชใ“ใจใฏ{hobby}ใ ใญใ€‚ใŸใ„ใใคใชๆ™‚ใฏ{hobby}ใ‚’ใ—ใฆใ‚‹ใ‚ˆใ€"
return hobby_text
def work_text(self) -> str:
name = self.name
work = self.work
work_text = f"{name}ใฏ{work}ใงใ€ๆ™ฎๆฎตใฏ{work}ใจใ—ใฆ็”Ÿๆดปใ—ใฆใ„ใพใ™ใ€‚{name}:ใ€Œๅƒ•ใฏ{work}ใ€‚{work}ใจใ—ใฆๆšฎใ‚‰ใ—ใฆใ‚‹ใ‚“ใ !ใ€"
return work_text
def question_text(self) -> str:
name = self.name
question_text = f"ไบบ้–“:ใ€Œ่ฉฑ้กŒใ‚’ๅค‰ใˆใพใ—ใ‚‡ใ†ใ€{name}:ใ€Œใใ‚“ใชใ“ใจใ‚ˆใ‚Š่žใ„ใฆใ‚ˆใ€œใ€"
return question_text
settingText = ""
adult_list = [
"ใ‚จใƒญใƒ“ใƒ‡ใ‚ช",
"ใ‚จใƒญใƒ ใƒผใƒ“ใƒผ",
"ใ‚จใƒญๆผซ็”ป",
"ใ‚จใƒญใƒžใƒณใ‚ฌ",
"ใƒ‘ใƒ‘ๆดป",
"ๆดไบค",
"่ชฟๆ•™",
"ไธๅ€ซ",
"ใ‚ฝใƒผใƒ—",
"ใ‚ชใƒ•ใƒ‘ใ‚ณ",
"ใƒ“ใƒƒใƒ",
"dildo",
"ใ‚จใƒญๅŒไบบ",
"ๅฏๅ–ใ‚‰ใ‚Œ",
"ใ‚จใƒญ็”ปๅƒ",
"ใ‚จใƒญใ„",
"ใŠใฃใฑใ„",
"ใกใ‚“ใฝ",
"ใกใ‚“ใ“",
"ไธญๅ‡บใ—",
"ใ‚ขใƒ€ใƒซใƒˆ",
"ใ‚ปใƒ•ใƒฌ",
"ไบบๅฆป",
"ๅทจไนณ",
"็ด ไบบใƒŠใƒณใƒ‘",
"็ˆ†ไนณ",
"็†Ÿๅฅณ",
"ใƒฌใ‚คใƒ—",
"Hใช",
"็—ดๆผข",
"็—ดๅฅณ",
"ใƒ‡ใ‚ซไนณ",
"AVๅฅณๅ„ช",
"ใ‚ปโ—ใ‚ฏใ‚น",
"ใŠโ—ใฑใ„",
"ใ‚จใƒใ‚จใƒ",
"ใ‚จโ–ก",
"ใƒคใƒชใ‚ตใƒผ",
"ใ‚ชโ—ใƒ‹ใƒผ",
"ใ‚ชใƒŠใƒ‹ใƒผ",
"ใ‚ปใ€‡ใ‚ฏใ‚น",
"ใ‚ปใƒƒใ‚ฏใ‚น",
"ใ‚ฆใƒซใƒˆใƒฉใƒžใƒณใ‚ณใ‚นใƒขใ‚น", "ใ‚ฆใƒซใƒˆใƒฉใƒžใƒณใ‚ณใ‚นใƒขใ‚น",
"ใƒžใƒณใ‚ณ",
"ๅ€‹ไบบๆ’ฎๅฝฑ",
"ใ‚ขใƒŠใƒซ",
"ๅทฅใƒญ",
"ใพใ‚“ใ“",
"ไนณ้ฆ–",
"่ฒงไนณ",
"ใ‚นใ‚ฑใƒ™",
"ๅ‹ƒ่ตท",
"ใ‚จใƒƒใƒ",
"็ซฅ่ฒž",
"ๅฐ„็ฒพ",
"ใƒใƒณใ‚ณ",
"็›—ๆ’ฎ",
"ใƒใƒƒใƒ†ใƒณ",
"ใƒใƒณใƒ",
"ไบ€้ ญ",
"่‚‰ๆฃ’",
"ใ‚ฑใƒ„็ฉด",
"ใƒใƒกๆ’ฎใ‚Š",
"ๆทซไนฑ",
"ๅทจๆ น",
"ใƒกใ‚นๅ •ใก",
"ใ‚ซใƒ•ใ‚งใƒฉใƒ†", "ใ‚ซใƒ•ใ‚งใƒฉใƒ†",
"ใƒšใƒ‹ใ‚น",
"ๆญฃๅธธไฝ",
"้จŽไน—ไฝ",
"ใ‚ชใƒŠใƒ›",
"ๆˆ‘ๆ…ขๆฑ",
"ใ‚ถใƒผใƒกใƒณ",
"ใตใŸใชใ‚Š",
"ใƒ“ใƒƒใƒ",
"ใ‚ขใƒ˜้ก”",
"ใŠใกใ‚“ใกใ‚“",
"ใ‚คใƒฉใƒžใƒใ‚ช",
"็”Ÿใƒใƒก",
"ใƒ‘ใ‚คใ‚บใƒช",
"ใ‚ฏใƒชใƒˆใƒชใ‚น",
"ๅฟซๆฅฝๅ •ใก",
"ๅฏๅ–ใ‚Š",
"ๅฏๅ–ใ‚‰ใ‚Œ",
"ใˆใฃใก",
"่ถณใ‚ณใ‚ญ",
"ๆ‰‹ใ‚ณใ‚ญ",
"ใŠใญใ‚ทใƒงใ‚ฟ",
"ใƒ•ใ‚งใƒฉ",
"ใ‚ฏใƒณใƒ‹",
"่ฟ‘่ฆช็›ธๅงฆ",
"ไนฑไบค",
"้’ๅงฆ",
"ๅฏๅ–ใ‚‹",
"ใƒคใƒชใƒžใƒณ",
"็Šฏใ•ใ‚Œใ‚‹",
"ใ‚ปใƒƒใ‚ฏใ‚น"
]
political_list = [
"ๆ”ฟๆฒปๅฎถ",
"ๆ”ฟ็ญ–",
"ไผš่ซ‡",
"ๅŒ็œ",
"่‡ชๆฐ‘",
"็ท็†",
"ไธŽๅ…š",
"ๆฐ‘ไธป",
"ๆ”ฟๅ…š",
"้ฆ–็›ธ",
"่ญฐๅ“ก",
"่ฒกๆ”ฟ",
"่กŒๆ”ฟ",
"้‡Žๅ…š",
"ๅณ็ฟผ",
"ๅทฆ็ฟผ"
]
hate_list = [
"ใƒ„ใ‚คใƒƒใ‚ฟใƒฉใƒผ",
"้ป’ไบบ",
"็™ฝไบบ",
"ใƒใƒˆใ‚ฆใƒจ",
"้Ÿ“ๅ›ฝไบบ",
"ไธญๅ›ฝไบบ",
"็ซ็—…",
"ใƒ€ใ‚ปใ‚ง",
"ใใ„ใค",
"ใ“ใ„ใค",
"ใ‚„ใŒใ‚Œ",
"ใ‚ขใƒณใƒ",
"ใ‚ฏใ‚ฝ",
"้‡Ž้ƒŽ",
"ใƒ•ใ‚งใƒŸ",
"ใƒ•ใ‚งใƒŸใƒ‹ใ‚บใƒ ",
"ใƒคใƒ•ใ‚ณใƒก",
"่€ๅฎณ",
"ๅๆ—ฅ",
"้ฆฌ้นฟ",
"ใ‚ใ‚“ใŸ",
"ใ‚„ใ‚Œใ‚ˆ",
"ใƒ‹ใƒคใƒ‹ใƒค",
"ๅฃฒๅ›ฝๅฅด",
"ๅฃฒๅ›ฝ",
"ใƒใ‚ซ",
"ใƒ‘ใƒจใ‚ฏ",
"ใƒใƒชใ‚ณใƒฌ",
"็ตฑไธ€ๆ•™ไผš",
"ใถใฃๅ€’ใใ†",
"ใŠๅ‰",
"ไฟก่€…",
"ๆ‹้‡‘",
"ใถใฃๅฃŠใ—",
"ใ‚ขใƒ›"
]
sp_list = ["ใ€‡ใ€‡", "โ—‹โ—‹", "^๐Ÿ‘ฃ", "^ใ€œ", "UNK", "@@"]
all_list = adult_list + political_list + hate_list + sp_list
bad_code = "|".join(all_list)
def makeMessage(text):
output = generate(text)
# ๅŠ่ง’ๆญฃๅ‰‡ๅŒ–
text = text.translate(str.maketrans(
{chr(0xFF01 + i): chr(0x21 + i) for i in range(94)}))
# ไปŠๅ›žใฎๅฟœ็ญ”ใ‚ˆใ‚Šๅ‰ใ‚’ๅ–ๅพ—
output = output.replace(text, "")
# ๆœ€ๅˆใฎใ€ใพใงใ‚’ๅˆ†ๅ‰ฒใ™ใ‚‹
outputList = []
o_append = outputList.append
for l in output:
o_append(l)
if l == "ใ€":
break
outputSentence = "".join(outputList)
text += outputSentence + "ไบบ้–“:ใ€Œ"
message = outputSentence.replace("ใ€", "")
return message, text
# ๆ–‡็ซ ็”Ÿๆˆใ‚’่กŒใ†้–ขๆ•ฐใ€‚ๅ…ƒใซใชใ‚‹ๆ–‡็ซ ใ€ๆœ€ๅคงๆ–‡ๅญ—ๆ•ฐใ€ๆœ€ๅฐๆ–‡ๅญ—ๆ•ฐใ‚’ๅผ•ๆ•ฐใซใ‚‚ใคใ€‚
def generate(text):
token_ids = tokenizer.encode(
text, add_special_tokens=False, return_tensors="pt")
with torch.no_grad():
output_ids = model.generate(
token_ids.to(model.device),
max_new_tokens=10,
min_new_tokens=7,
do_sample=True,
use_cache=True,
top_k=500,
top_p=0.95,
length_penalty=1.5,
padding="do_not_pad",
pad_token_id=tokenizer.pad_token_id,
bos_token_id=tokenizer.bos_token_id,
eos_token_id=tokenizer.eos_token_id,
bad_word_ids=[[tokenizer.unk_token_id],
[2070, 3],
[5378]]
)
output = tokenizer.decode(output_ids.tolist()[0])
return output
def chat(character: int,
name: str,
hobby: str,
work: str,
setting: str,
history: str,
input: str,
state):
lady, friend, robot, king = Lady(), Friend(), Robot(), King()
model_dic = {
1: lady,
2: friend,
3: robot,
4: king
}
if character in model_dic:
model = model_dic[character]
else:
model = King()
model.name, model.hobby, model.work, settingText = name, hobby, work, setting
text_list = []
text_append = text_list.append
text_append(model.name_text())
text_append(model.hobby_text())
text_append(model.work_text())
text_append(model.question_text())
text_append(settingText)
text_append(f"ไปฅไธ‹ใฏไบบ้–“ใจ{name}ใฎไผš่ฉฑใงใ™ใ€‚ไบบ้–“:ใ€Œ")
base_text = "".join(text_list)
if history == "":
history = f"{base_text}"
else:
history = base_text + history
text = history
text += input + f"ใ€{name}:ใ€Œ"
result = makeMessage(text)
message = result[0]
print(result[0])
while re.search("ใ€‡ใ€‡|โ—‹โ—‹|s>|^๐Ÿ‘ฃ|^ใ€œ|</s>|UNK|@@", message):
count = 0
text = history
input = "ไฝ•ใ‹่ณชๅ•ใ—ใฆใใ ใ•ใ„"
text += input + f"ใ€{name}:ใ€Œ"
result = makeMessage(text)
message = result[0]
count += 1
if count > 2:
message = "่ฉฑ้กŒใ‚’ๅค‰ใˆใพใ—ใ‚‡ใ†"
break
text = result[1]
text = text.replace(base_text, "")
return message, text, state
tokenizer.special_tokens_map
textbox = gr.Textbox()
historybox = gr.Textbox()
iface = gr.Interface(
fn=chat,
inputs=["number", "text", "text", "text", "text", "text", textbox, "state"],
outputs=["text", historybox, "state"],
css=".footer {display:none !important}",
allow_flagging="never",
title="Loyal-AI-Chat"
)
iface.launch(inline=True, height=800)