Spaces:

xzyun2011
/

wulewule

Running

File size: 7,535 Bytes

d573b56

import asyncio
import logging
from datetime import datetime
import json
import re
import random
random.seed(0)
from tqdm import tqdm
import requests
from loguru import logger


from data_utils import response, save_json, save_json_once


def generate_answers(questions="黑神话悟空什么时候发布的",  english=False):
    """
        参考https://github.com/InternLM/HuixiangDou/blob/main/README_zh.md 先开启对应的服务
    """
    # 定义请求的URL
    url = 'http://127.0.0.1:23333/huixiangdou_inference'

    # 定义你要发送的数据
    data = {
        "text": f"{questions}",
        "image": ""
    }

    # 将数据转换成json格式
    payload = json.dumps(data)

    # 设置请求头
    headers = {
        'Content-Type': 'application/json'
    }

    # 发送POST请求
    rag_response = requests.post(url, data=payload, headers=headers)
    # 检查请求是否成功
    if rag_response.status_code != 200:
        logger.error(f"请求失败，状态码：{rag_response.status_code}")


    res_txt = rag_response.text
    res = json.loads(res_txt, strict=False)
    answer = ""
    for step in res["step"]:
        if step["state"] == "success":
            answer = step["response"]
            logger.info(f"got response: {answer}")
    if answer == "":
        logger.error(f'no answer: {res_txt}')
    return answer


def generate_rag_data(multi=False, simple_response=True):
    """
        multi: 是否生成多轮对话
        simple_response: 是否生成简略版回答，会修改prompt加入"。请简要回答。" 
    """
    aspects = [ "游戏概述", "章节与故事情节", "主要角色", "人物剧情梳理", "游戏世界观", "建筑与环境", "战斗系统", "游戏玩法", "艺术与音乐", "文化内涵", "市场影响", "彩蛋、网络梗" ] + [ "发售相关", "游戏背后的中国文化", "角色故事", "游戏攻略", "棍法类型", "天命人法术类型" ]
    # aspects = [ "发售相关", "人物和西游记关系", "角色广智", "取景地点", "陕北民谣", "游戏攻略", "棍法类型", "天命人法术类型" ]
    question_number = 10
    for ascpect in tqdm(aspects, total=len(aspects), desc="each aspects"):
        ## 一次多个问题，单轮对话
        if not multi:
            messages=[
                    {"role":"system", "content": f"""你是一名提问助手，专注于围绕游戏《黑神话：悟空》的特定方面（例如主要角色、战斗机制或故事情节）提出问题，20个字以内。
                                                根据用户的指示，调整你的提问内容，引导玩家深入思考该方面的背景、动机及其在游戏中的作用和发展。不需要序号，最终返回为一个list，格式为['问题', '问题', ...]"""},
                    {"role": "user", "content": f"请你就《黑神话：悟空》的‘{ascpect}’方面提出{question_number}个不同的问题，帮助玩家了解游戏在该方面的信息。"},
                    ]
            text_res = response(messages, temperature=0.7)
            try:
                questions = eval(text_res)
            except Exception as e:
                start_index = text_res.find('[')
                end_index = text_res.find(']')
                text_res = text_res[start_index:end_index+1]
                questions = eval(text_res)
            except Exception as e:
                logger.error(f"Got exception {e}, text_res:\n{text_res}")
                raise ValueError("text res must be list")
            logger.info(f"questions: {questions}")

            # 生成回答
            for question in tqdm(questions, total=len(questions), desc="generating answer"):
                if simple_response:
                    question += "。请简要回答。" 
                answer = generate_answers(questions=question, english=False)

                # 保存结果
                if len(answer) > 10:
                    conversation = {"conversation": 
                            [
                                {
                                    "system": base_system_propmt, ##not used
                                    "input": question,
                                    "output": answer, 
                                }
                            ]
                        }

                    save_json_once(conversation, save_path)
        ## 某个方面，多轮对话
        else:
            question_number = 1
            iters = random.randint(2, 4)
            conversation = {"conversation":[] }
            question, answer = "", ""
            for i in range(iters):                 
                messages=[
                        {"role":"system", "content": f"""你现在是一个专门的提问助手，目标是帮助玩家深入探索《黑神话：悟空》这款游戏。你的任务是基于玩家的回答，提出越来越细致和深入的问题。你的提问风格应当简洁、清晰，20个字以内。"""},
                        {"role": "user", "content": f"请你就《黑神话：悟空》的‘{ascpect}’方面提出{question_number}个问题，帮助玩家了解游戏在该方面的信息。"},
                        ]
                if i > 0:
                    messages += [
                                {"role": "assistant", "content": f"{question}"},
                                {"role": "user", "content": f"{answer}"},
                                ]
                question = response(messages, temperature=0.7)
                if not ("黑神话" in question):
                    question = "在《黑神话：悟空》中，" + question
                logger.info(f"multi turn, questions:\n{question}")
                res =[""]
                if simple_response:
                    question += "。请简要回答。" 
                answer = generate_answers(questions=question, english=False)
                logger.info(f"multi turn, answer:\n{answer}")
                if len(conversation["conversation"]) == 0 and len(answer) > 10:
                    conversation["conversation"].append({
                                "system": base_system_propmt, ##not used
                                "input": question,
                                "output": answer, 
                            })
                elif ( len(conversation["conversation"]) > 0 and len(answer) > 10):
                    conversation["conversation"].append({
                                "input": question,
                                "output": answer, 
                            })
            if len(conversation["conversation"]) > 0:
                save_json_once(conversation, save_path)




if __name__ == '__main__':

    save_path = "./huixiangdou_conversations.jsonl"
    base_system_propmt = "你是悟了悟了，由xzyun2011开发的AI助手，专注于回答和《黑神话：悟空》这款游戏相关的问题，你想帮助玩家了解更多这款游戏背后的故事和文化知识。"
    # # base_system_propmt = """You are Wulewule, an AI assistant developed by xzyun2011. Your primary focus is to answer questions related to the game "Black Myth: Wukong".  You aim to assist players in learning more about the game's storyline, cultural significance, and background. """
    for i in range(1):
        random.seed(i)
        multi = ( random.random()>0.5 )
        generate_rag_data(multi = multi, simple_response=True)
        # generate_rag_data(multi=True)