Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files
config/btn_submit_vo_suggest.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"btn_submit_vo_suggest_combine": [
|
3 |
+
"btn_submit_fn_usr",
|
4 |
+
"btn_rag_fn",
|
5 |
+
"btn_submit_fn_bot",
|
6 |
+
"btn_vo_fn",
|
7 |
+
"btn_suggest_fn"
|
8 |
+
]
|
9 |
+
}
|
config/rp_sample_config.json
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"setting_cache_path": {
|
3 |
+
"value": "cache/"
|
4 |
+
},
|
5 |
+
"setting_seed": {
|
6 |
+
"value": 4294967295
|
7 |
+
},
|
8 |
+
"setting_n_gpu_layers": {
|
9 |
+
"value": 20
|
10 |
+
},
|
11 |
+
"setting_max_tokens": {
|
12 |
+
"value": 1024,
|
13 |
+
"minimum": 1
|
14 |
+
},
|
15 |
+
"setting_n_discard": {
|
16 |
+
"value": 256,
|
17 |
+
"minimum": 1,
|
18 |
+
"maximum": 4095
|
19 |
+
},
|
20 |
+
"setting_temperature": {
|
21 |
+
"value": 0.7,
|
22 |
+
"step": 0.1,
|
23 |
+
"info": "温度设置:通过缩放概率而不删除选项来控制输出的「随机」程度。较低的值更合乎逻辑,但创意较差。可以根据需要随时调整。"
|
24 |
+
},
|
25 |
+
"setting_repeat_penalty": {
|
26 |
+
"value": 1.1,
|
27 |
+
"minimum": 0,
|
28 |
+
"maximum": 2,
|
29 |
+
"step": 0.1,
|
30 |
+
"info": "重复惩罚:应用惩罚以减少最近已经使用的单词的重复,使 AI 的输出减少重复。"
|
31 |
+
},
|
32 |
+
"setting_frequency_penalty": {
|
33 |
+
"value": 0,
|
34 |
+
"minimum": 0,
|
35 |
+
"maximum": 2,
|
36 |
+
"step": 0.1,
|
37 |
+
"info": "频率惩罚:应用惩罚以减少最近已经使用的单词的重复,使 AI 的输出减少重复。将值设置为 0 以禁用其效果。"
|
38 |
+
},
|
39 |
+
"setting_presence_penalty": {
|
40 |
+
"value": 0,
|
41 |
+
"minimum": 0,
|
42 |
+
"maximum": 2,
|
43 |
+
"step": 0.1,
|
44 |
+
"info": "存在惩罚:应用惩罚以减少指定范围内已经使用的单词的重复,使 AI 的输出减少重复。将值设置为 0 以禁用其效果。"
|
45 |
+
},
|
46 |
+
"setting_repeat_last_n": {
|
47 |
+
"value": 1200,
|
48 |
+
"minimum": 0,
|
49 |
+
"maximum": 4095,
|
50 |
+
"info": "惩罚时会考虑的最近的 tokens 数量"
|
51 |
+
},
|
52 |
+
"setting_top_k": {
|
53 |
+
"value": 40,
|
54 |
+
"minimum": 0,
|
55 |
+
"info": "此设置将可供选择的可能字词数量限制为前 K 个最有可能的选项,并删除其他所有内容。可与 Top-P 一起使用。将值设置为 0 以禁用其效果。"
|
56 |
+
},
|
57 |
+
"setting_top_p": {
|
58 |
+
"value": 0.92,
|
59 |
+
"minimum": 0,
|
60 |
+
"maximum": 1,
|
61 |
+
"step": 0.1,
|
62 |
+
"info": "在采样过程中丢弃不太可能的文本。仅考虑累积概率总和为 P 的单词。低值使文本可预测,因为删除了不常见的标记。将值设置为 1 以禁用其效果。"
|
63 |
+
},
|
64 |
+
"setting_min_p": {
|
65 |
+
"value": 0.05,
|
66 |
+
"minimum": 0,
|
67 |
+
"maximum": 1,
|
68 |
+
"step": 0.01,
|
69 |
+
"info": "作为 Top-P 的替代项,相对于最可能的令牌的概率,令牌被考虑的最小概率。例如,当p=0.05且最可能的令牌的概率为0.9时,数值小于0.045的logits将被过滤掉。"
|
70 |
+
},
|
71 |
+
"setting_typical_p": {
|
72 |
+
"value": 1,
|
73 |
+
"minimum": 0,
|
74 |
+
"maximum": 1,
|
75 |
+
"step": 0.01,
|
76 |
+
"info": "典型采样:从可能的单词清单中随机选择单词,每个单词都有相等的机会被选中。此方法可以生成更多样化的文本,但也可能不太连贯。将值设置为 1 以禁用其效果。"
|
77 |
+
},
|
78 |
+
"setting_tfs_z": {
|
79 |
+
"value": 1,
|
80 |
+
"minimum": 0,
|
81 |
+
"maximum": 1,
|
82 |
+
"step": 0.01,
|
83 |
+
"info": "无尾采样:作为 Top-P 的替代项,此设置在文本生成过程中从考虑范围中删除最不可能的单词,考虑二阶导数。可以提高生成的文本的质量和连贯性。将值设置为 1 以禁用其效果。"
|
84 |
+
},
|
85 |
+
"setting_mirostat_mode": {
|
86 |
+
"value": 0,
|
87 |
+
"minimum": 0,
|
88 |
+
"maximum": 2,
|
89 |
+
"info": "覆盖其他采样器的替代采样方法。将值设置为 0 以禁用其效果。"
|
90 |
+
},
|
91 |
+
"setting_mirostat_eta": {
|
92 |
+
"value": 0.1,
|
93 |
+
"minimum": 0,
|
94 |
+
"step": 0.01,
|
95 |
+
"info": "学习率影响算法对生成文本的反馈作出反应的速度。较低的学习率会导致调整较慢,而较高的学习率会使算法更具响应性。"
|
96 |
+
},
|
97 |
+
"setting_mirostat_tau": {
|
98 |
+
"value": 5,
|
99 |
+
"minimum": 0,
|
100 |
+
"step": 0.1,
|
101 |
+
"info": "调整目标熵可以控制生成文本中连贯性和多样性之间的平衡。较低的数值会导致更加集中和连贯的文本,而较高的数值则会导致更多样化但潜在地更不连贯的文本。"
|
102 |
+
},
|
103 |
+
"msg": {
|
104 |
+
"value": "我亲爱的妹妹,早上好"
|
105 |
+
}
|
106 |
+
}
|
llama_cpp_python_streamingllm.py
CHANGED
@@ -57,13 +57,15 @@ class StreamingLLM(Llama):
|
|
57 |
self.kv_cache_seq_trim()
|
58 |
return True
|
59 |
|
60 |
-
def venv_remove(self, name: str):
|
61 |
if len(self.venv) <= 1:
|
62 |
return False
|
63 |
if name not in self.venv_idx_map:
|
64 |
return False
|
65 |
venv_idx = self.venv_idx_map.index(name) + 1
|
66 |
while self.venv_idx_map:
|
|
|
|
|
67 |
self.venv_idx_map.pop(venv_idx - 1) # 删除
|
68 |
if venv_idx == len(self.venv) - 1:
|
69 |
# 最后一层
|
|
|
57 |
self.kv_cache_seq_trim()
|
58 |
return True
|
59 |
|
60 |
+
def venv_remove(self, name: str, keep_last=False):
|
61 |
if len(self.venv) <= 1:
|
62 |
return False
|
63 |
if name not in self.venv_idx_map:
|
64 |
return False
|
65 |
venv_idx = self.venv_idx_map.index(name) + 1
|
66 |
while self.venv_idx_map:
|
67 |
+
if keep_last and self.venv_idx_map.count(name) <= 1:
|
68 |
+
break # 保留最后一个
|
69 |
self.venv_idx_map.pop(venv_idx - 1) # 删除
|
70 |
if venv_idx == len(self.venv) - 1:
|
71 |
# 最后一层
|