Limour commited on
Commit
0523803
·
verified ·
1 Parent(s): d762eb5

Upload 11 files

Browse files
app.py CHANGED
@@ -1,10 +1,19 @@
1
  import subprocess
2
  import select
3
- from hf_api import restart_space
 
 
 
 
 
 
 
 
 
4
 
5
  try:
6
  # 启动另一个程序,并通过管道捕获其输出
7
- process = subprocess.Popen(["python", "sub_app.py"],
8
  stdout=subprocess.PIPE,
9
  stderr=subprocess.PIPE,
10
  bufsize=1, universal_newlines=True)
 
1
  import subprocess
2
  import select
3
+ import os
4
+ from mods.btn_reset import restart_space
5
+
6
+ if not os.path.exists('downloads/causallm_7b.Q5_K_M.gguf'):
7
+ from huggingface_hub import snapshot_download
8
+ os.mkdir("downloads")
9
+ os.mkdir("cache")
10
+ snapshot_download(repo_id='TheBloke/CausalLM-7B-GGUF', local_dir=r'downloads',
11
+ allow_patterns='causallm_7b.Q5_K_M.gguf')
12
+ snapshot_download(repo_id='Limour/llama-python-streamingllm-cache', repo_type='dataset', local_dir=r'cache')
13
 
14
  try:
15
  # 启动另一个程序,并通过管道捕获其输出
16
+ process = subprocess.Popen(["python", "gradio_streamingllm.py"],
17
  stdout=subprocess.PIPE,
18
  stderr=subprocess.PIPE,
19
  bufsize=1, universal_newlines=True)
gradio_streamingllm.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import threading
3
+ from llama_cpp_python_streamingllm import StreamingLLM
4
+ from mods.read_cfg import cfg
5
+
6
+ from mods.text_display import init as text_display_init
7
+
8
+ from mods.btn_rag import init as btn_rag_init
9
+
10
+ # ========== 按钮中用到的共同的函数 ==========
11
+ from mods.btn_com import init as btn_com_init
12
+
13
+ # ========== 输出一段旁白 ==========
14
+ from mods.btn_submit import init as btn_submit_init
15
+
16
+ # ========== 输出一段旁白 ==========
17
+ from mods.btn_vo import init as btn_vo_init
18
+
19
+ # ========== 给用户提供默认回复的建议 ==========
20
+ from mods.btn_suggest import init as btn_suggest_init
21
+
22
+ # ========== 重置按钮 ==========
23
+ from mods.btn_reset import init as btn_reset_init
24
+
25
+ # ========== 聊天的模版 默认 chatml ==========
26
+ from chat_template import ChatTemplate
27
+
28
+ # ========== 全局锁,确保只能进行一个会话 ==========
29
+ cfg['session_lock'] = threading.Lock()
30
+ cfg['session_active'] = False
31
+
32
+ # ========== 温度、采样之类的设置 ==========
33
+ with gr.Blocks() as setting:
34
+ with gr.Row():
35
+ cfg['setting_path'] = gr.Textbox(label="模型路径", max_lines=1, scale=2, **cfg['setting_path'])
36
+ cfg['setting_cache_path'] = gr.Textbox(label="缓存路径", max_lines=1, scale=2, **cfg['setting_cache_path'])
37
+ cfg['setting_seed'] = gr.Number(label="随机种子", scale=1, **cfg['setting_seed'])
38
+ cfg['setting_n_gpu_layers'] = gr.Number(label="n_gpu_layers", scale=1, **cfg['setting_n_gpu_layers'])
39
+ with gr.Row():
40
+ cfg['setting_ctx'] = gr.Number(label="上下文大小(Tokens)", **cfg['setting_ctx'])
41
+ cfg['setting_max_tokens'] = gr.Number(label="最大响应长度(Tokens)", interactive=True,
42
+ **cfg['setting_max_tokens'])
43
+ cfg['setting_n_keep'] = gr.Number(value=10, label="n_keep", interactive=False)
44
+ cfg['setting_n_discard'] = gr.Number(label="n_discard", interactive=True, **cfg['setting_n_discard'])
45
+ with gr.Row():
46
+ cfg['setting_temperature'] = gr.Number(label="温度", interactive=True, **cfg['setting_temperature'])
47
+ cfg['setting_repeat_penalty'] = gr.Number(label="重复惩罚", interactive=True, **cfg['setting_repeat_penalty'])
48
+ cfg['setting_frequency_penalty'] = gr.Number(label="频率惩罚", interactive=True,
49
+ **cfg['setting_frequency_penalty'])
50
+ cfg['setting_presence_penalty'] = gr.Number(label="存在惩罚", interactive=True,
51
+ **cfg['setting_presence_penalty'])
52
+ cfg['setting_repeat_last_n'] = gr.Number(label="惩罚范围", interactive=True, **cfg['setting_repeat_last_n'])
53
+ with gr.Row():
54
+ cfg['setting_top_k'] = gr.Number(label="Top-K", interactive=True, **cfg['setting_top_k'])
55
+ cfg['setting_top_p'] = gr.Number(label="Top P", interactive=True, **cfg['setting_top_p'])
56
+ cfg['setting_min_p'] = gr.Number(label="Min P", interactive=True, **cfg['setting_min_p'])
57
+ cfg['setting_typical_p'] = gr.Number(label="Typical", interactive=True, **cfg['setting_typical_p'])
58
+ cfg['setting_tfs_z'] = gr.Number(label="TFS", interactive=True, **cfg['setting_tfs_z'])
59
+ with gr.Row():
60
+ cfg['setting_mirostat_mode'] = gr.Number(label="Mirostat 模式", **cfg['setting_mirostat_mode'])
61
+ cfg['setting_mirostat_eta'] = gr.Number(label="Mirostat 学习率", interactive=True,
62
+ **cfg['setting_mirostat_eta'])
63
+ cfg['setting_mirostat_tau'] = gr.Number(label="Mirostat 目标熵", interactive=True,
64
+ **cfg['setting_mirostat_tau'])
65
+
66
+ # ========== 加载模型 ==========
67
+ cfg['model'] = StreamingLLM(model_path=cfg['setting_path'].value,
68
+ seed=cfg['setting_seed'].value,
69
+ n_gpu_layers=cfg['setting_n_gpu_layers'].value,
70
+ n_ctx=cfg['setting_ctx'].value)
71
+ cfg['chat_template'] = ChatTemplate(cfg['model'])
72
+ cfg['setting_ctx'].value = cfg['model'].n_ctx()
73
+
74
+ # ========== 展示角色卡 ==========
75
+ with gr.Blocks() as role:
76
+ with gr.Row():
77
+ cfg['role_usr'] = gr.Textbox(label="用户名称", max_lines=1, interactive=False, **cfg['role_usr'])
78
+ cfg['role_char'] = gr.Textbox(label="角色名称", max_lines=1, interactive=False, **cfg['role_char'])
79
+
80
+ cfg['role_char_d'] = gr.Textbox(lines=10, label="故事描述", **cfg['role_char_d'])
81
+ cfg['role_chat_style'] = gr.Textbox(lines=10, label="回复示例", **cfg['role_chat_style'])
82
+
83
+ # ========== 加载角色卡-缓存 ==========
84
+ from mods.load_cache import init as load_cache_init
85
+
86
+ text_display_init(cfg)
87
+ load_cache_init(cfg)
88
+
89
+ # ========== 聊天页面 ==========
90
+ with gr.Blocks() as chatting:
91
+ with gr.Row(equal_height=True):
92
+ cfg['chatbot'] = gr.Chatbot(height='60vh', scale=2, value=cfg['chatbot'],
93
+ avatar_images=(r'assets/user.png', r'assets/chatbot.webp'))
94
+ with gr.Column(scale=1, elem_id="area"):
95
+ cfg['rag'] = gr.Textbox(label='RAG', show_copy_button=True, elem_id="RAG-area")
96
+ cfg['vo'] = gr.Textbox(label='VO', show_copy_button=True, elem_id="VO-area")
97
+ cfg['s_info'] = gr.Textbox(value=cfg['model'].venv_info, max_lines=1, label='info', interactive=False)
98
+ cfg['msg'] = gr.Textbox(label='Prompt', lines=2, max_lines=2, elem_id='prompt', autofocus=True, **cfg['msg'])
99
+ with gr.Row():
100
+ cfg['btn_vo'] = gr.Button("旁白")
101
+ cfg['btn_rag'] = gr.Button("RAG")
102
+ cfg['btn_retry'] = gr.Button("Retry")
103
+ cfg['btn_com1'] = gr.Button("自定义1")
104
+ cfg['btn_reset'] = gr.Button("Reset")
105
+ cfg['btn_debug'] = gr.Button("Debug")
106
+ cfg['btn_submit'] = gr.Button("Submit")
107
+ cfg['btn_suggest'] = gr.Button("建议")
108
+
109
+ cfg['gr'] = gr
110
+ btn_com_init(cfg)
111
+
112
+ btn_rag_init(cfg)
113
+
114
+ btn_submit_init(cfg)
115
+
116
+ btn_vo_init(cfg)
117
+
118
+ btn_suggest_init(cfg)
119
+
120
+ # ========== 用于调试 ==========
121
+ btn_reset_init(cfg)
122
+
123
+ # ========== 让聊天界面的文本框等高 ==========
124
+ custom_css = r'''
125
+ #area > div {
126
+ height: 100%;
127
+ }
128
+ #RAG-area {
129
+ flex-grow: 1;
130
+ }
131
+ #RAG-area > label {
132
+ height: 100%;
133
+ display: flex;
134
+ flex-direction: column;
135
+ }
136
+ #RAG-area > label > textarea {
137
+ flex-grow: 1;
138
+ max-height: 20vh;
139
+ }
140
+ #VO-area {
141
+ flex-grow: 1;
142
+ }
143
+ #VO-area > label {
144
+ height: 100%;
145
+ display: flex;
146
+ flex-direction: column;
147
+ }
148
+ #VO-area > label > textarea {
149
+ flex-grow: 1;
150
+ max-height: 20vh;
151
+ }
152
+ #prompt > label > textarea {
153
+ max-height: 63px;
154
+ }
155
+ '''
156
+
157
+ # ========== 开始运行 ==========
158
+ demo = gr.TabbedInterface([chatting, setting, role],
159
+ ["聊天", "设置", '角色'],
160
+ css=custom_css)
161
+ gr.close_all()
162
+ demo.queue(api_open=False, max_size=1).launch(share=False)
mods/btn_com.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def init(cfg):
2
+ chat_template = cfg['chat_template']
3
+ model = cfg['model']
4
+ gr = cfg['gr']
5
+ lock = cfg['session_lock']
6
+
7
+ # ========== 流式输出函数 ==========
8
+ def btn_com(_n_keep, _n_discard,
9
+ _temperature, _repeat_penalty, _frequency_penalty,
10
+ _presence_penalty, _repeat_last_n, _top_k,
11
+ _top_p, _min_p, _typical_p,
12
+ _tfs_z, _mirostat_mode, _mirostat_eta,
13
+ _mirostat_tau, _role, _max_tokens):
14
+ # ========== 初始化输出模版 ==========
15
+ t_bot = chat_template(_role)
16
+ completion_tokens = [] # 有可能多个 tokens 才能构成一个 utf-8 编码的文字
17
+ history = ''
18
+ # ========== 流式输出 ==========
19
+ for token in model.generate_t(
20
+ tokens=t_bot,
21
+ n_keep=_n_keep,
22
+ n_discard=_n_discard,
23
+ im_start=chat_template.im_start_token,
24
+ top_k=_top_k,
25
+ top_p=_top_p,
26
+ min_p=_min_p,
27
+ typical_p=_typical_p,
28
+ temp=_temperature,
29
+ repeat_penalty=_repeat_penalty,
30
+ repeat_last_n=_repeat_last_n,
31
+ frequency_penalty=_frequency_penalty,
32
+ presence_penalty=_presence_penalty,
33
+ tfs_z=_tfs_z,
34
+ mirostat_mode=_mirostat_mode,
35
+ mirostat_tau=_mirostat_tau,
36
+ mirostat_eta=_mirostat_eta,
37
+ ):
38
+ if token in chat_template.eos or token == chat_template.nlnl:
39
+ t_bot.extend(completion_tokens)
40
+ print('token in eos', token)
41
+ break
42
+ completion_tokens.append(token)
43
+ all_text = model.str_detokenize(completion_tokens)
44
+ if not all_text:
45
+ continue
46
+ t_bot.extend(completion_tokens)
47
+ history += all_text
48
+ yield history
49
+ if token in chat_template.onenl:
50
+ # ========== 移除末尾的换行符 ==========
51
+ if t_bot[-2] in chat_template.onenl:
52
+ model.venv_pop_token()
53
+ break
54
+ if t_bot[-2] in chat_template.onerl and t_bot[-3] in chat_template.onenl:
55
+ model.venv_pop_token()
56
+ break
57
+ if history[-2:] == '\n\n': # 各种 'x\n\n' 的token,比如'。\n\n'
58
+ print('t_bot[-4:]', t_bot[-4:], repr(model.str_detokenize(t_bot[-4:])),
59
+ repr(model.str_detokenize(t_bot[-1:])))
60
+ break
61
+ if len(t_bot) > _max_tokens:
62
+ break
63
+ completion_tokens = []
64
+ # ========== 查看末尾的换行符 ==========
65
+ print('history', repr(history))
66
+ # ========== 给 kv_cache 加上输出结束符 ==========
67
+ model.eval_t(chat_template.im_end_nl, _n_keep, _n_discard)
68
+ t_bot.extend(chat_template.im_end_nl)
69
+
70
+ cfg['btn_com'] = btn_com
71
+
72
+ def btn_start_or_finish(finish):
73
+ tmp = gr.update(interactive=finish)
74
+
75
+ def _inner():
76
+ with lock:
77
+ if cfg['session_active'] != finish:
78
+ raise RuntimeError
79
+ cfg['session_active'] = not cfg['session_active']
80
+ return tmp, tmp, tmp
81
+
82
+ return _inner
83
+
84
+ btn_start_or_finish_outputs = [cfg['btn_submit'], cfg['btn_vo'], cfg['btn_suggest']]
85
+
86
+ cfg['btn_start'] = {
87
+ 'fn': btn_start_or_finish(False),
88
+ 'outputs': btn_start_or_finish_outputs
89
+ }
90
+
91
+ cfg['btn_finish'] = {
92
+ 'fn': btn_start_or_finish(True),
93
+ 'outputs': btn_start_or_finish_outputs
94
+ }
95
+
96
+ cfg['setting'] = [cfg[x] for x in ('setting_n_keep', 'setting_n_discard',
97
+ 'setting_temperature', 'setting_repeat_penalty', 'setting_frequency_penalty',
98
+ 'setting_presence_penalty', 'setting_repeat_last_n', 'setting_top_k',
99
+ 'setting_top_p', 'setting_min_p', 'setting_typical_p',
100
+ 'setting_tfs_z', 'setting_mirostat_mode', 'setting_mirostat_eta',
101
+ 'setting_mirostat_tau', 'role_usr', 'role_char',
102
+ 'rag', 'setting_max_tokens')]
mods/btn_rag.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def init(cfg):
2
+ # ========== 待实现 ==========
3
+ def btn_rag_(_rag, _msg):
4
+ retn = ''
5
+ return retn
6
+
7
+ cfg['btn_rag'].click(fn=btn_rag_, outputs=cfg['rag'],
8
+ inputs=[cfg['rag'], cfg['msg']])
mods/btn_reset.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from huggingface_hub import HfApi
4
+
5
+ API = HfApi(token=os.environ.get("HF_TOKEN"))
6
+ REPO_ID = "Limour/llama-python-streamingllm"
7
+
8
+
9
+ def restart_space():
10
+ API.restart_space(repo_id=REPO_ID, token=os.environ.get("HF_TOKEN"))
11
+
12
+
13
+ def init(cfg):
14
+ # ========== 共同 ==========
15
+ model = cfg['model']
16
+ s_info = cfg['s_info']
17
+
18
+ def btn_reset(_cache_path):
19
+ try:
20
+ with cfg['session_lock']:
21
+ _tmp = model.load_session(_cache_path)
22
+ print(f'load cache from {_cache_path} {_tmp}')
23
+ cfg['session_active'] = False
24
+ return model.venv_info
25
+ except Exception as e:
26
+ restart_space()
27
+ raise e
28
+
29
+ cfg['btn_reset'].click(
30
+ fn=btn_reset,
31
+ inputs=cfg['setting_cache_path'],
32
+ outputs=s_info
33
+ ).success(
34
+ **cfg['btn_finish']
35
+ )
36
+
37
+ cfg['btn_debug'].click(
38
+ fn=lambda: model.str_detokenize(model._input_ids),
39
+ outputs=cfg['vo']
40
+ )
mods/btn_submit.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def init(cfg):
2
+ # ========== 共同 ==========
3
+ model = cfg['model']
4
+ btn_com = cfg['btn_com']
5
+ s_info = cfg['s_info']
6
+ lock = cfg['session_lock']
7
+
8
+ # ========== 特殊 ==========
9
+ chat_template = cfg['chat_template']
10
+ msg = cfg['msg']
11
+ chatbot = cfg['chatbot']
12
+ chat_display_format = cfg['chat_display_format']
13
+
14
+ # ========== 显示用户消息 ==========
15
+ def btn_submit_usr(message: str, history):
16
+ # print('btn_submit_usr', message, history)
17
+ if history is None:
18
+ history = []
19
+ return "", history + [[message.strip(), '']]
20
+
21
+ # ========== 模型流式响应 ==========
22
+ def btn_submit_bot(history, _n_keep, _n_discard,
23
+ _temperature, _repeat_penalty, _frequency_penalty,
24
+ _presence_penalty, _repeat_last_n, _top_k,
25
+ _top_p, _min_p, _typical_p,
26
+ _tfs_z, _mirostat_mode, _mirostat_eta,
27
+ _mirostat_tau, _usr, _char,
28
+ _rag, _max_tokens):
29
+ with lock:
30
+ if not cfg['session_active']:
31
+ raise RuntimeError
32
+ # ========== 需要临时注入的内容 ==========
33
+ if len(_rag) > 0:
34
+ model.venv_create('rag') # 记录 venv_idx
35
+ t_rag = chat_template('system', _rag)
36
+ model.eval_t(t_rag, _n_keep, _n_discard)
37
+ # ========== 释放不再需要的环境 ==========
38
+ model.venv_disband({'usr', 'char'})
39
+ print('venv_disband char', model.venv_info)
40
+ # ========== 用户输入 ==========
41
+ model.venv_create('usr')
42
+ t_msg = history[-1][0]
43
+ t_msg = chat_template(_usr, t_msg)
44
+ model.eval_t(t_msg, _n_keep, _n_discard)
45
+ yield history, model.venv_info
46
+ # ========== 模型输出 ==========
47
+ model.venv_create('char')
48
+ _tmp = btn_com(_n_keep, _n_discard,
49
+ _temperature, _repeat_penalty, _frequency_penalty,
50
+ _presence_penalty, _repeat_last_n, _top_k,
51
+ _top_p, _min_p, _typical_p,
52
+ _tfs_z, _mirostat_mode, _mirostat_eta,
53
+ _mirostat_tau, _char, _max_tokens)
54
+ for _h in _tmp:
55
+ history[-1][1] = _h
56
+ yield history, model.venv_info
57
+ # ========== 输出完毕后格式化输出 ==========
58
+ history[-1][1] = chat_display_format(history[-1][1])
59
+ yield history, model.venv_info
60
+ # ========== 响应完毕后清除注入的内容 ==========
61
+ model.venv_remove('rag') # 销毁对应的 venv
62
+ yield history, model.venv_info
63
+
64
+ cfg['btn_submit'].click(
65
+ **cfg['btn_start']
66
+ ).success(
67
+ fn=btn_submit_usr, api_name="submit",
68
+ inputs=[msg, chatbot],
69
+ outputs=[msg, chatbot]
70
+ ).success(
71
+ fn=btn_submit_bot,
72
+ inputs=[chatbot]+cfg['setting'],
73
+ outputs=[chatbot, s_info]
74
+ ).success(
75
+ **cfg['btn_finish']
76
+ )
mods/btn_suggest.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def init(cfg):
2
+ # ========== 共同 ==========
3
+ model = cfg['model']
4
+ btn_com = cfg['btn_com']
5
+ s_info = cfg['s_info']
6
+ lock = cfg['session_lock']
7
+
8
+ # ========== 给用户提供默认回复的建议 ==========
9
+ def btn_suggest(_n_keep, _n_discard,
10
+ _temperature, _repeat_penalty, _frequency_penalty,
11
+ _presence_penalty, _repeat_last_n, _top_k,
12
+ _top_p, _min_p, _typical_p,
13
+ _tfs_z, _mirostat_mode, _mirostat_eta,
14
+ _mirostat_tau, _usr, _char,
15
+ _rag, _max_tokens):
16
+ with lock:
17
+ if not cfg['session_active']:
18
+ raise RuntimeError
19
+ # ========== 模型输出建议 ==========
20
+ model.venv_create('suggest') # 创建隔离环境
21
+ _tmp = btn_com(_n_keep, _n_discard,
22
+ _temperature, _repeat_penalty, _frequency_penalty,
23
+ _presence_penalty, _repeat_last_n, _top_k,
24
+ _top_p, _min_p, _typical_p,
25
+ _tfs_z, _mirostat_mode, _mirostat_eta,
26
+ _mirostat_tau, _usr, _max_tokens)
27
+ _h = ''
28
+ for _h in _tmp:
29
+ yield _h, model.venv_info
30
+ model.venv_remove('suggest') # 销毁隔离环境
31
+ yield _h, model.venv_info
32
+
33
+ cfg['btn_suggest'].click(
34
+ **cfg['btn_start']
35
+ ).success(
36
+ fn=btn_suggest,
37
+ inputs=cfg['setting'],
38
+ outputs=[cfg['msg'], s_info]
39
+ ).success(
40
+ **cfg['btn_finish']
41
+ )
mods/btn_vo.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def init(cfg):
2
+ # ========== 共同 ==========
3
+ model = cfg['model']
4
+ btn_com = cfg['btn_com']
5
+ s_info = cfg['s_info']
6
+ lock = cfg['session_lock']
7
+
8
+ # ========== 输出一段旁白 ==========
9
+ def btn_vo(_n_keep, _n_discard,
10
+ _temperature, _repeat_penalty, _frequency_penalty,
11
+ _presence_penalty, _repeat_last_n, _top_k,
12
+ _top_p, _min_p, _typical_p,
13
+ _tfs_z, _mirostat_mode, _mirostat_eta,
14
+ _mirostat_tau, _usr, _char,
15
+ _rag, _max_tokens):
16
+ with lock:
17
+ if not cfg['session_active']:
18
+ raise RuntimeError
19
+ # ========== 及时清理上一次生成的旁白 ==========
20
+ model.venv_remove('vo')
21
+ print('清理旁白', model.venv_info)
22
+ # ========== 模型输出旁白 ==========
23
+ model.venv_create('vo') # 创建隔离环境
24
+ _tmp = btn_com(_n_keep, _n_discard,
25
+ _temperature, _repeat_penalty, _frequency_penalty,
26
+ _presence_penalty, _repeat_last_n, _top_k,
27
+ _top_p, _min_p, _typical_p,
28
+ _tfs_z, _mirostat_mode, _mirostat_eta,
29
+ _mirostat_tau, '旁白', _max_tokens)
30
+ for _h in _tmp:
31
+ yield _h, model.venv_info
32
+
33
+ cfg['btn_vo'].click(
34
+ **cfg['btn_start']
35
+ ).success(
36
+ fn=btn_vo,
37
+ inputs=cfg['setting'],
38
+ outputs=[cfg['vo'], s_info]
39
+ ).success(
40
+ **cfg['btn_finish']
41
+ )
mods/load_cache.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ def init(cfg):
5
+ if os.path.exists(cfg['setting_cache_path'].value):
6
+ # ========== 加载角色卡-缓存 ==========
7
+ tmp = cfg['model'].load_session(cfg['setting_cache_path'].value)
8
+ print(f"load cache from {cfg['setting_cache_path'].value} {tmp}")
9
+ tmp = cfg['chat_template']('system',
10
+ cfg['text_format'](cfg['role_char_d'].value,
11
+ char=cfg['role_char'].value,
12
+ user=cfg['role_usr'].value))
13
+ cfg['setting_n_keep'].value = len(tmp)
14
+ tmp = cfg['chat_template'](cfg['role_char'].value,
15
+ cfg['text_format'](cfg['role_chat_style'].value,
16
+ char=cfg['role_char'].value,
17
+ user=cfg['role_usr'].value))
18
+ cfg['setting_n_keep'].value += len(tmp)
19
+ # ========== 加载角色卡-第一条消息 ==========
20
+ cfg['chatbot'] = []
21
+ for one in cfg["role_char_first"]:
22
+ one['name'] = cfg['text_format'](one['name'],
23
+ char=cfg['role_char'].value,
24
+ user=cfg['role_usr'].value)
25
+ one['value'] = cfg['text_format'](one['value'],
26
+ char=cfg['role_char'].value,
27
+ user=cfg['role_usr'].value)
28
+ if one['name'] == cfg['role_char'].value:
29
+ cfg['chatbot'].append((None, cfg['chat_display_format'](one['value'])))
30
+ print(one)
31
+ else:
32
+ # ========== 加载角色卡-角色描述 ==========
33
+ tmp = cfg['chat_template']('system',
34
+ cfg['text_format'](cfg['role_char_d'].value,
35
+ char=cfg['role_char'].value,
36
+ user=cfg['role_usr'].value))
37
+ cfg['setting_n_keep'].value = cfg['model'].eval_t(tmp) # 此内容永久存在
38
+
39
+ # ========== 加载角色卡-回复示例 ==========
40
+ tmp = cfg['chat_template'](cfg['role_char'].value,
41
+ cfg['text_format'](cfg['role_chat_style'].value,
42
+ char=cfg['role_char'].value,
43
+ user=cfg['role_usr'].value))
44
+ cfg['setting_n_keep'].value += cfg['model'].eval_t(tmp) # 此内容永久存在
45
+
46
+ # ========== 加载角色卡-第一条消息 ==========
47
+ cfg['chatbot'] = []
48
+ for one in cfg["role_char_first"]:
49
+ one['name'] = cfg['text_format'](one['name'],
50
+ char=cfg['role_char'].value,
51
+ user=cfg['role_usr'].value)
52
+ one['value'] = cfg['text_format'](one['value'],
53
+ char=cfg['role_char'].value,
54
+ user=cfg['role_usr'].value)
55
+ if one['name'] == cfg['role_char'].value:
56
+ cfg['chatbot'].append((None, cfg['chat_display_format'](one['value'])))
57
+ print(one)
58
+ tmp = cfg['chat_template'](one['name'], one['value'])
59
+ cfg['model'].eval_t(tmp) # 此内容随上下文增加将被丢弃
60
+
61
+ # ========== 保存角色卡-缓存 ==========
62
+ with open(cfg['setting_cache_path'].value, 'wb') as f:
63
+ pass
64
+ tmp = cfg['model'].save_session(cfg['setting_cache_path'].value)
65
+ print(f'save cache {tmp}')
66
+ # ========== 上传缓存 ==========
67
+ if os.environ.get("HF_TOKEN"):
68
+ from huggingface_hub import login, CommitScheduler
69
+ login(token=os.environ.get("HF_TOKEN"), write_permission=True)
70
+ CommitScheduler(repo_id='Limour/llama-python-streamingllm-cache', repo_type='dataset', folder_path='cache')
mods/read_cfg.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import json
3
+
4
+
5
+ # ========== 哈希函数 ==========
6
+ def x_hash(x: str):
7
+ return hashlib.sha1(x.encode('utf-8')).hexdigest()
8
+
9
+
10
+ # ========== 读取配置文件 ==========
11
+ with open('rp_config.json', encoding='utf-8') as f:
12
+ tmp = f.read()
13
+ with open('rp_sample_config.json', encoding='utf-8') as f:
14
+ cfg = json.load(f)
15
+ cfg['setting_cache_path']['value'] += x_hash(tmp)
16
+ cfg.update(json.loads(tmp))
mods/text_display.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ # ========== 适配 SillyTavern 的模版 ==========
5
+ def text_format(text: str, _env=None, **env):
6
+ if _env is not None:
7
+ for k, v in _env.items():
8
+ text = text.replace(r'{{' + k + r'}}', v)
9
+ for k, v in env.items():
10
+ text = text.replace(r'{{' + k + r'}}', v)
11
+ return text
12
+
13
+
14
+ # ========== 给引号加粗 ==========
15
+ reg_q = re.compile(r'“(.+?)”')
16
+
17
+
18
+ def chat_display_format(text: str):
19
+ return reg_q.sub(r' **\g<0>** ', text)
20
+
21
+
22
+ def init(cfg):
23
+ cfg['text_format'] = text_format
24
+ cfg['chat_display_format'] = chat_display_format