THUDM
/

chatglm-6b

@@ -46,6 +46,17 @@ CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LIST = [
     # See all ChatGLM-6B models at https://huggingface.co/models?filter=chatglm
 ]
 class InvalidScoreLogitsProcessor(LogitsProcessor):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
@@ -1087,9 +1098,19 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             for layer_past in past
         )
     @torch.no_grad()
-    def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, max_length: int = 2048, num_beams=1,
-             do_sample=True, top_p=0.7, temperature=0.95, logits_processor=None, **kwargs):
         if history is None:
             history = []
         if logits_processor is None:
@@ -1097,20 +1118,21 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
         logits_processor.append(InvalidScoreLogitsProcessor())
         gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
                       "temperature": temperature, "logits_processor": logits_processor, **kwargs}
         if not history:
             prompt = query
         else:
             prompt = ""
             for i, (old_query, response) in enumerate(history):
-                prompt += "[Round {}]\n问：{}\n答：{}\n".format(i, old_query, response)
-            prompt += "[Round {}]\n问：{}\n答：".format(len(history), query)
         input_ids = tokenizer([prompt], return_tensors="pt", padding=True)
         input_ids = input_ids.to(self.device)
         outputs = self.generate(**input_ids, **gen_kwargs)
         outputs = outputs.tolist()[0][len(input_ids["input_ids"][0]) - 2:]
         response = tokenizer.decode(outputs)
-        response = response.strip()
-        response = response.replace("[[训练时间]]", "2023年")
         history = history + [(query, response)]
         return response, history
@@ -1165,6 +1187,39 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
         return torch.tensor(return_seqs, dtype=torch.long, device=kwargs['input_ids'].device)
     def quantize(self, bits: int):
         from .quantization import quantize
         self.transformer = quantize(self.transformer, bits)

     # See all ChatGLM-6B models at https://huggingface.co/models?filter=chatglm
 ]
+QUERY_KEYWORDS = {
+    'chinese-simplified': {
+        'question': '问：',
+        'answer': '答：',
+    },
+    'english': {
+        'question': 'Q:',
+        'answer': 'A:',
+    }
+}
 class InvalidScoreLogitsProcessor(LogitsProcessor):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
             for layer_past in past
         )
+    def chat(self, *args, **kwargs):
+        return self.chat_chinese_simplified(*args, **kwargs)
+    def chat_chinese_simplified(self, *args, **kwargs):
+        return self.chat_internal(*args, **kwargs, locale='chinese-simplified')
+    def chat_english(self, *args, **kwargs):
+        return self.chat_internal(*args, **kwargs, locale='english')
     @torch.no_grad()
+    def chat_internal(self, tokenizer, query: str, locale: str,
+                    history: List[Tuple[str, str]] = None, max_length: int = 2048, num_beams=1,
+                    do_sample=True, top_p=0.7, temperature=0.95, logits_processor=None, **kwargs):
         if history is None:
             history = []
         if logits_processor is None:
         logits_processor.append(InvalidScoreLogitsProcessor())
         gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
                       "temperature": temperature, "logits_processor": logits_processor, **kwargs}
+        format_query_keyword_question = QUERY_KEYWORDS[locale]['question']
+        format_query_keyword_answer = QUERY_KEYWORDS[locale]['answer']
         if not history:
             prompt = query
         else:
             prompt = ""
             for i, (old_query, response) in enumerate(history):
+                prompt += f"[Round {i}]\n{format_query_keyword_question}{old_query}\n{format_query_keyword_answer}{response}\n"
+            prompt += f"[Round {len(history)}]\n{format_query_keyword_question}{query}\n{format_query_keyword_answer}"
         input_ids = tokenizer([prompt], return_tensors="pt", padding=True)
         input_ids = input_ids.to(self.device)
         outputs = self.generate(**input_ids, **gen_kwargs)
         outputs = outputs.tolist()[0][len(input_ids["input_ids"][0]) - 2:]
         response = tokenizer.decode(outputs)
+        response = self.post_process(response, locale=locale)
         history = history + [(query, response)]
         return response, history
         return torch.tensor(return_seqs, dtype=torch.long, device=kwargs['input_ids'].device)
+    def post_process(self, response: str, locale: str) -> str:
+        response = response.strip()
+        response = response.replace("[[训练时间]]", "2023年")
+        if locale == 'chinese-simplified':
+            import re
+            # CJK Unified Ideographs + CJK Unified Ideographs Extension A
+            cjk_regex = r'([\u4e00-\u9fff]|[\u3400-\u4dbf])'
+            regex_mapping = {
+                cjk_regex + ',': r'\1，',
+                cjk_regex + r'\.': r'\1。',
+                cjk_regex + r'\?': r'\1？',
+                cjk_regex + '!': r'\1！',
+                cjk_regex + ':': r'\1：',
+                cjk_regex + ';': r'\1；',
+            }
+            for pattern in regex_mapping:
+                response = re.sub(pattern, regex_mapping[pattern], response)
+            # Nested parantheses not supported.
+            response = re.sub(r'\(([^\(\)]*(?:[\u4e00-\u9fff]|[\u3400-\u4dbf])[^\(\)]*)\)', r'（\1）', response)
+        elif locale == 'english':
+            mapping = {
+                '，': ',',
+                '。': '.',
+                '？': '?',
+                '！': '!',
+                '：': ':',
+                '；': ';',
+            }
+            for char in mapping:
+                response = response.replace(char, mapping[char])
+        return response
     def quantize(self, bits: int):
         from .quantization import quantize
         self.transformer = quantize(self.transformer, bits)