ChenyuRabbitLove commited on
Commit
90a4f47
β€’
1 Parent(s): e4c798e

feat: add summerizer map-reduce

Browse files
Files changed (1) hide show
  1. utils/gpt_processor.py +4 -4
utils/gpt_processor.py CHANGED
@@ -24,7 +24,7 @@ class GPTAgent:
24
  response = self.agent.complete(messages=messages)
25
  return response.choices[0].message["content"]
26
 
27
- def split_into_many(text):
28
  tokenizer = tiktoken.get_encoding("cl100k_base")
29
 
30
  sentences = text.split("。")
@@ -36,12 +36,12 @@ class GPTAgent:
36
 
37
  for sentence, token in zip(sentences, n_tokens):
38
 
39
- if tokens_so_far + token > 500:
40
  chunks.append("。".join(chunk) + "。")
41
  chunk = []
42
  tokens_so_far = 0
43
 
44
- if token > 500:
45
  continue
46
  chunk.append(sentence)
47
  tokens_so_far += token + 1
@@ -345,4 +345,4 @@ class QuestionAnswerer(GPTAgent):
345
  if history[i][1] is not None:
346
  messages.append({"role": "assistant", "content": history[i][1]})
347
 
348
- return messages
 
24
  response = self.agent.complete(messages=messages)
25
  return response.choices[0].message["content"]
26
 
27
+ def split_into_many(self, text):
28
  tokenizer = tiktoken.get_encoding("cl100k_base")
29
 
30
  sentences = text.split("。")
 
36
 
37
  for sentence, token in zip(sentences, n_tokens):
38
 
39
+ if tokens_so_far + token > 3000:
40
  chunks.append("。".join(chunk) + "。")
41
  chunk = []
42
  tokens_so_far = 0
43
 
44
+ if token > 3000:
45
  continue
46
  chunk.append(sentence)
47
  tokens_so_far += token + 1
 
345
  if history[i][1] is not None:
346
  messages.append({"role": "assistant", "content": history[i][1]})
347
 
348
+ return messages