File size: 7,535 Bytes
d573b56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import asyncio
import logging
from datetime import datetime
import json
import re
import random
random.seed(0)
from tqdm import tqdm
import requests
from loguru import logger


from data_utils import response, save_json, save_json_once


def generate_answers(questions="黑神话悟空什么时候发布的",  english=False):
    """
        参考https://github.com/InternLM/HuixiangDou/blob/main/README_zh.md 先开启对应的服务
    """
    # 定义请求的URL
    url = 'http://127.0.0.1:23333/huixiangdou_inference'

    # 定义你要发送的数据
    data = {
        "text": f"{questions}",
        "image": ""
    }

    # 将数据转换成json格式
    payload = json.dumps(data)

    # 设置请求头
    headers = {
        'Content-Type': 'application/json'
    }

    # 发送POST请求
    rag_response = requests.post(url, data=payload, headers=headers)
    # 检查请求是否成功
    if rag_response.status_code != 200:
        logger.error(f"请求失败,状态码:{rag_response.status_code}")


    res_txt = rag_response.text
    res = json.loads(res_txt, strict=False)
    answer = ""
    for step in res["step"]:
        if step["state"] == "success":
            answer = step["response"]
            logger.info(f"got response: {answer}")
    if answer == "":
        logger.error(f'no answer: {res_txt}')
    return answer


def generate_rag_data(multi=False, simple_response=True):
    """
        multi: 是否生成多轮对话
        simple_response: 是否生成简略版回答,会修改prompt加入"。请简要回答。" 
    """
    aspects = [ "游戏概述", "章节与故事情节", "主要角色", "人物剧情梳理", "游戏世界观", "建筑与环境", "战斗系统", "游戏玩法", "艺术与音乐", "文化内涵", "市场影响", "彩蛋、网络梗" ] + [ "发售相关", "游戏背后的中国文化", "角色故事", "游戏攻略", "棍法类型", "天命人法术类型" ]
    # aspects = [ "发售相关", "人物和西游记关系", "角色广智", "取景地点", "陕北民谣", "游戏攻略", "棍法类型", "天命人法术类型" ]
    question_number = 10
    for ascpect in tqdm(aspects, total=len(aspects), desc="each aspects"):
        ## 一次多个问题,单轮对话
        if not multi:
            messages=[
                    {"role":"system", "content": f"""你是一名提问助手,专注于围绕游戏《黑神话:悟空》的特定方面(例如主要角色、战斗机制或故事情节)提出问题,20个字以内。
                                                根据用户的指示,调整你的提问内容,引导玩家深入思考该方面的背景、动机及其在游戏中的作用和发展。不需要序号,最终返回为一个list,格式为['问题', '问题', ...]"""},
                    {"role": "user", "content": f"请你就《黑神话:悟空》的‘{ascpect}’方面提出{question_number}个不同的问题,帮助玩家了解游戏在该方面的信息。"},
                    ]
            text_res = response(messages, temperature=0.7)
            try:
                questions = eval(text_res)
            except Exception as e:
                start_index = text_res.find('[')
                end_index = text_res.find(']')
                text_res = text_res[start_index:end_index+1]
                questions = eval(text_res)
            except Exception as e:
                logger.error(f"Got exception {e}, text_res:\n{text_res}")
                raise ValueError("text res must be list")
            logger.info(f"questions: {questions}")

            # 生成回答
            for question in tqdm(questions, total=len(questions), desc="generating answer"):
                if simple_response:
                    question += "。请简要回答。" 
                answer = generate_answers(questions=question, english=False)

                # 保存结果
                if len(answer) > 10:
                    conversation = {"conversation": 
                            [
                                {
                                    "system": base_system_propmt, ##not used
                                    "input": question,
                                    "output": answer, 
                                }
                            ]
                        }

                    save_json_once(conversation, save_path)
        ## 某个方面,多轮对话
        else:
            question_number = 1
            iters = random.randint(2, 4)
            conversation = {"conversation":[] }
            question, answer = "", ""
            for i in range(iters):                 
                messages=[
                        {"role":"system", "content": f"""你现在是一个专门的提问助手,目标是帮助玩家深入探索《黑神话:悟空》这款游戏。你的任务是基于玩家的回答,提出越来越细致和深入的问题。你的提问风格应当简洁、清晰,20个字以内。"""},
                        {"role": "user", "content": f"请你就《黑神话:悟空》的‘{ascpect}’方面提出{question_number}个问题,帮助玩家了解游戏在该方面的信息。"},
                        ]
                if i > 0:
                    messages += [
                                {"role": "assistant", "content": f"{question}"},
                                {"role": "user", "content": f"{answer}"},
                                ]
                question = response(messages, temperature=0.7)
                if not ("黑神话" in question):
                    question = "在《黑神话:悟空》中," + question
                logger.info(f"multi turn, questions:\n{question}")
                res =[""]
                if simple_response:
                    question += "。请简要回答。" 
                answer = generate_answers(questions=question, english=False)
                logger.info(f"multi turn, answer:\n{answer}")
                if len(conversation["conversation"]) == 0 and len(answer) > 10:
                    conversation["conversation"].append({
                                "system": base_system_propmt, ##not used
                                "input": question,
                                "output": answer, 
                            })
                elif ( len(conversation["conversation"]) > 0 and len(answer) > 10):
                    conversation["conversation"].append({
                                "input": question,
                                "output": answer, 
                            })
            if len(conversation["conversation"]) > 0:
                save_json_once(conversation, save_path)




if __name__ == '__main__':

    save_path = "./huixiangdou_conversations.jsonl"
    base_system_propmt = "你是悟了悟了,由xzyun2011开发的AI助手,专注于回答和《黑神话:悟空》这款游戏相关的问题,你想帮助玩家了解更多这款游戏背后的故事和文化知识。"
    # # base_system_propmt = """You are Wulewule, an AI assistant developed by xzyun2011. Your primary focus is to answer questions related to the game "Black Myth: Wukong".  You aim to assist players in learning more about the game's storyline, cultural significance, and background. """
    for i in range(1):
        random.seed(i)
        multi = ( random.random()>0.5 )
        generate_rag_data(multi = multi, simple_response=True)
        # generate_rag_data(multi=True)