CC_and_Distractors

Sleeping

App Files Files Community

谢璐璟 commited on Sep 15, 2024

Commit

e9ce3e8

1 Parent(s): c1d41a3

.

Browse files

Files changed (12) hide show

README.md +1 -1
app.py +306 -18
utils/__pycache__/api_utils.cpython-310.pyc +0 -0
utils/__pycache__/generate_distractors.cpython-310.pyc +0 -0
utils/__pycache__/generate_translation.cpython-310.pyc +0 -0
utils/__pycache__/prompt.cpython-310.pyc +0 -0
utils/__pycache__/prompt.cpython-311.pyc +0 -0
utils/__pycache__/prompt.cpython-38.pyc +0 -0
utils/__pycache__/turkle.cpython-310.pyc +0 -0
utils/api_utils.py +211 -0
utils/generate_distractors.py +179 -0
utils/generate_translation.py +98 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: CC
 emoji: 📉
 colorFrom: indigo
 colorTo: indigo

 ---
+title: CC_and_Newoptions
 emoji: 📉
 colorFrom: indigo
 colorTo: indigo

app.py CHANGED Viewed

@@ -1,45 +1,333 @@
 import gradio as gr
-import json, os
 import urllib.request
-# Replace 'YOUR_API_KEY' with your actual YouTube Data API key
-API_KEY = os.getenv('api_key')
 def get_youtube_id(youtube_url):
     if 'youtube.com' in youtube_url:
         video_id = youtube_url.split('v=')[-1]
     elif 'youtu.be' in youtube_url:
         video_id = youtube_url.split('/')[-1].split('?')[0]
     return video_id
 def check_cc_license(youtube_url):
-    # Extract video ID from the URL
     video_id = get_youtube_id(youtube_url)
-    # YouTube Data API URL to get video details
     api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
     try:
-        # Fetch video details
         response = urllib.request.urlopen(api_url)
         data = json.load(response)
-        # Check the license status
-        for item in data['items']:
             if item['status']['license'] == 'creativeCommon':
-                return f"Yes."
             else:
-                return f"No."
     except Exception as e:
         return f"An error occurred: {str(e)}"
-# Gradio interface
-interface = gr.Interface(
-    fn=check_cc_license,
-    inputs=gr.Textbox(label="YouTube Video URL"),
-    outputs=gr.Textbox(label="Creative Commons license?")
-)
-if __name__ == "__main__":
-    interface.launch()

+# import gradio as gr
+# import json, os
+# import urllib.request
+# # Replace 'YOUR_API_KEY' with your actual YouTube Data API key
+# API_KEY = os.getenv('api_key')
+# def get_youtube_id(youtube_url):
+#     if 'youtube.com' in youtube_url:
+#         video_id = youtube_url.split('v=')[-1]
+#     elif 'youtu.be' in youtube_url:
+#         video_id = youtube_url.split('/')[-1].split('?')[0]
+#     return video_id
+# def check_cc_license(youtube_url):
+#     # Extract video ID from the URL
+#     video_id = get_youtube_id(youtube_url)
+#     # YouTube Data API URL to get video details
+#     api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
+#     try:
+#         # Fetch video details
+#         response = urllib.request.urlopen(api_url)
+#         data = json.load(response)
+#         # Check the license status
+#         for item in data['items']:
+#             if item['status']['license'] == 'creativeCommon':
+#                 return f"Yes."
+#             else:
+#                 return f"No."
+#     except Exception as e:
+#         return f"An error occurred: {str(e)}"
+# # Gradio interface
+# interface = gr.Interface(
+#     fn=check_cc_license,
+#     inputs=gr.Textbox(label="YouTube Video URL"),
+#     outputs=gr.Textbox(label="Creative Commons license?")
+# )
+# if __name__ == "__main__":
+#     interface.launch()
+# import gradio as gr
+# import asyncio
+# import os
+# from openai import AsyncOpenAI
+# # 从您的模块中导入必要的函数
+# from utils.generate_distractors import prepare_q_inputs, construct_prompt_textonly, generate_distractors
+# from utils.api_utils import generate_from_openai_chat_completion
+# # 修改generate_distractors函数，使其成为异步函数
+# # 假设generate_distractors函数定义在您的模块中，我们需要修改它
+# # 如果无法修改原始模块，请在此处重新定义
+# async def generate_distractors_async(model_name: str,
+#                                      queries: list,
+#                                      n: int=1,
+#                                      max_tokens: int=4096):
+#     assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
+#     client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://yanlp.zeabur.app/v1")
+#     messages = prepare_q_inputs(queries)
+#     # 直接等待协程而不是使用asyncio.run()
+#     responses = await generate_from_openai_chat_completion(
+#         client,
+#         messages=messages,
+#         engine_name=model_name,
+#         n=n,
+#         max_tokens=max_tokens,
+#         requests_per_minute=30,
+#         json_format=True
+#     )
+#     for query, response in zip(queries, responses):
+#         new_options = response
+#         if new_options and "distractors" in new_options:
+#             query["option_5"] = new_options["distractors"].get("E", "")
+#             query["option_6"] = new_options["distractors"].get("F", "")
+#             query["option_7"] = new_options["distractors"].get("G", "")
+#             query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
+#         else:
+#             query["option_5"] = ""
+#             query["option_6"] = ""
+#             query["option_7"] = ""
+#             query["distractor_analysis"] = ""
+#     return queries
+# # 定义异步处理函数
+# async def generate_distractors_gradio(question, option1, option2, option3, option4, answer, answer_analysis):
+#     query = {
+#         'question': question,
+#         'option_1': option1,
+#         'option_2': option2,
+#         'option_3': option3,
+#         'option_4': option4,
+#         'answer': answer,
+#         'answer_analysis': answer_analysis
+#     }
+#     queries = [query]  # 因为函数期望的是一个列表
+#     # 调用异步生成干扰项的函数
+#     results = await generate_distractors_async(
+#         model_name="gpt-4o-mini",
+#         queries=queries,
+#         n=1,
+#         max_tokens=4096
+#     )
+#     # 提取结果
+#     result = results[0]
+#     new_options = {
+#         'E': result.get('option_5', ''),
+#         'F': result.get('option_6', ''),
+#         'G': result.get('option_7', '')
+#     }
+#     distractor_analysis = result.get('distractor_analysis', '')
+#     # 返回新的干扰项和分析
+#     return new_options, distractor_analysis
+# # 创建Gradio界面
+# with gr.Blocks() as demo:
+#     gr.Markdown("# 多项选择题干扰项生成器")
+#     with gr.Row():
+#         question_input = gr.Textbox(label="问题", lines=2)
+#     with gr.Row():
+#         option1_input = gr.Textbox(label="选项A")
+#         option2_input = gr.Textbox(label="选项B")
+#     with gr.Row():
+#         option3_input = gr.Textbox(label="选项C")
+#         option4_input = gr.Textbox(label="选项D")
+#     with gr.Row():
+#         answer_input = gr.Textbox(label="正确答案")
+#     with gr.Row():
+#         answer_analysis_input = gr.Textbox(label="答案解析", lines=3)
+#     with gr.Row():
+#         generate_button = gr.Button("生成干扰项")
+#     with gr.Row():
+#         output_options = gr.JSON(label="生成的干扰选项")
+#     with gr.Row():
+#         output_analysis = gr.Textbox(label="干扰项解析", lines=5)
+#     # 定义按钮点击事件，注意这里不需要修改，Gradio会自动处理异步函数
+#     generate_button.click(
+#         fn=generate_distractors_gradio,
+#         inputs=[question_input, option1_input, option2_input, option3_input, option4_input, answer_input, answer_analysis_input],
+#         outputs=[output_options, output_analysis]
+#     )
+# # 运行Gradio应用
+# demo.launch()
 import gradio as gr
+import asyncio
+import os
+import json
 import urllib.request
+from openai import AsyncOpenAI
+# 第一个功能：检查YouTube视频是否具有Creative Commons许可证
+# 请确保在环境变量中设置了您的YouTube Data API密钥
+API_KEY = "AIzaSyDyPpkFRUpUuSMQbhxwTFxCBLK5qTHU-ms"
 def get_youtube_id(youtube_url):
     if 'youtube.com' in youtube_url:
         video_id = youtube_url.split('v=')[-1]
+        video_id = video_id.split('&')[0]  # 移除可能的额外参数
     elif 'youtu.be' in youtube_url:
         video_id = youtube_url.split('/')[-1].split('?')[0]
+    else:
+        video_id = ''
     return video_id
 def check_cc_license(youtube_url):
+    # 从URL中提取视频ID
     video_id = get_youtube_id(youtube_url)
+    if not video_id:
+        return "Invalid YouTube URL."
+    # YouTube Data API URL，用于获取视频详情
     api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
     try:
+        # 获取视频详情
         response = urllib.request.urlopen(api_url)
         data = json.load(response)
+        # 检查许可证状态
+        if 'items' in data and len(data['items']) > 0:
+            item = data['items'][0]
             if item['status']['license'] == 'creativeCommon':
+                return "Yes."
             else:
+                return "No."
+        else:
+            return "Video not found."
     except Exception as e:
         return f"An error occurred: {str(e)}"
+# 第二个功能：为多项选择题生成干扰项
+# 从您的模块中导入必要的函数
+from utils.generate_distractors import prepare_q_inputs, construct_prompt_textonly, generate_distractors
+from utils.api_utils import generate_from_openai_chat_completion
+# 修改generate_distractors函数，使其成为异步函数
+# 假设generate_distractors函数定义在您的模块中，我们需要修改它
+# 如果无法修改原始模块，请在此处重新定义
+async def generate_distractors_async(model_name: str,
+                                     queries: list,
+                                     n: int=1,
+                                     max_tokens: int=4096):
+    assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
+    client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://yanlp.zeabur.app/v1")
+    messages = prepare_q_inputs(queries)
+    # 直接等待协程而不是使用asyncio.run()
+    responses = await generate_from_openai_chat_completion(
+        client,
+        messages=messages,
+        engine_name=model_name,
+        n=n,
+        max_tokens=max_tokens,
+        requests_per_minute=30,
+        json_format=True
+    )
+    for query, response in zip(queries, responses):
+        new_options = response
+        if new_options and "distractors" in new_options:
+            query["option_5"] = new_options["distractors"].get("E", "")
+            query["option_6"] = new_options["distractors"].get("F", "")
+            query["option_7"] = new_options["distractors"].get("G", "")
+            query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
+        else:
+            query["option_5"] = ""
+            query["option_6"] = ""
+            query["option_7"] = ""
+            query["distractor_analysis"] = ""
+    return queries
+# 定义异步处理函数
+async def generate_distractors_gradio(question, option1, option2, option3, option4, answer, answer_analysis):
+    query = {
+        'question': question,
+        'option_1': option1,
+        'option_2': option2,
+        'option_3': option3,
+        'option_4': option4,
+        'answer': answer,
+        'answer_analysis': answer_analysis
+    }
+    queries = [query]  # 因为函数期望的是一个列表
+    # 调用异步生成干扰项的函数
+    results = await generate_distractors_async(
+        model_name="gpt-4o-mini",
+        queries=queries,
+        n=1,
+        max_tokens=4096
+    )
+    # 提取结果
+    result = results[0]
+    new_options = {
+        'E': result.get('option_5', ''),
+        'F': result.get('option_6', ''),
+        'G': result.get('option_7', '')
+    }
+    distractor_analysis = result.get('distractor_analysis', '')
+    # 返回新的干扰项和分析
+    return new_options, distractor_analysis
+with gr.Blocks() as demo:
+    gr.Markdown("# 多功能Gradio应用")
+    with gr.Tabs():
+        with gr.TabItem("YouTube Creative Commons检查器"):
+            gr.Markdown("## 检查YouTube视频是否具有Creative Commons许可证")
+            youtube_url_input = gr.Textbox(label="YouTube视频URL")
+            cc_license_output = gr.Textbox(label="是否为Creative Commons许可证？")
+            check_button = gr.Button("检查许可证")
+            check_button.click(
+                fn=check_cc_license,
+                inputs=youtube_url_input,
+                outputs=cc_license_output
+            )
+        with gr.TabItem("多项选择题干扰项生成器"):
+            gr.Markdown("## 为多项选择题生成干扰项")
+            with gr.Row():
+                question_input = gr.Textbox(label="问题", lines=2)
+            with gr.Row():
+                option1_input = gr.Textbox(label="选项A")
+                option2_input = gr.Textbox(label="选项B")
+            with gr.Row():
+                option3_input = gr.Textbox(label="选项C")
+                option4_input = gr.Textbox(label="选项D")
+            with gr.Row():
+                answer_input = gr.Textbox(label="正确答案")
+            with gr.Row():
+                answer_analysis_input = gr.Textbox(label="答案解析", lines=3)
+            generate_button = gr.Button("生成干扰项")
+            output_options = gr.JSON(label="生成的干扰选项")
+            output_analysis = gr.Textbox(label="干扰项解析", lines=5)
+            generate_button.click(
+                fn=generate_distractors_gradio,
+                inputs=[question_input, option1_input, option2_input, option3_input, option4_input, answer_input, answer_analysis_input],
+                outputs=[output_options, output_analysis]
+            )
+# 运行Gradio应用
+demo.launch()

utils/__pycache__/api_utils.cpython-310.pyc ADDED Viewed

Binary file (4.81 kB). View file

utils/__pycache__/generate_distractors.cpython-310.pyc ADDED Viewed

Binary file (4.4 kB). View file

utils/__pycache__/generate_translation.cpython-310.pyc ADDED Viewed

Binary file (2.85 kB). View file

utils/__pycache__/prompt.cpython-310.pyc ADDED Viewed

Binary file (3.59 kB). View file

utils/__pycache__/prompt.cpython-311.pyc ADDED Viewed

Binary file (5.88 kB). View file

utils/__pycache__/prompt.cpython-38.pyc ADDED Viewed

Binary file (3.59 kB). View file

utils/__pycache__/turkle.cpython-310.pyc ADDED Viewed

Binary file (3.3 kB). View file

utils/api_utils.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import base64
+import numpy as np
+from typing import Dict
+import random
+import asyncio
+import logging
+import os, json
+from typing import Any
+from aiohttp import ClientSession
+from tqdm.asyncio import tqdm_asyncio
+import random
+from time import sleep
+import aiolimiter
+import openai
+from openai import AsyncOpenAI, OpenAIError
+from anthropic import AsyncAnthropic
+async def _throttled_openai_chat_completion_acreate(
+    client: AsyncOpenAI,
+    model: str,
+    messages,
+    temperature: float,
+    max_tokens: int,
+    top_p: float,
+    limiter: aiolimiter.AsyncLimiter,
+    json_format: bool = False,
+    n: int = 1,
+):
+    async with limiter:
+        for _ in range(10):
+            try:
+                if json_format:
+                    return await client.chat.completions.create(
+                        model=model,
+                        messages=messages,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        top_p=top_p,
+                        n=n,
+                        response_format={"type": "json_object"},
+                    )
+                else:
+                    return await client.chat.completions.create(
+                        model=model,
+                        messages=messages,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        top_p=top_p,
+                        n=n,
+                    )
+            except openai.RateLimitError as e:
+                print("Rate limit exceeded, retrying...")
+                sleep(random.randint(10, 20))  # 增加重试等待时间
+            except openai.BadRequestError as e:
+                print(e)
+                return None
+            except OpenAIError as e:
+                print(e)
+                sleep(random.randint(5, 10))
+        return None
+async def generate_from_openai_chat_completion(
+    client,
+    messages,
+    engine_name: str,
+    temperature: float = 1.0,
+    max_tokens: int = 512,
+    top_p: float = 1.0,
+    requests_per_minute: int = 100,
+    json_format: bool = False,
+    n: int = 1,
+):
+    # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168
+    delay = 60.0 / requests_per_minute
+    limiter = aiolimiter.AsyncLimiter(1, delay)
+    async_responses = [
+        _throttled_openai_chat_completion_acreate(
+            client,
+            model=engine_name,
+            messages=message,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            limiter=limiter,
+            json_format=json_format,
+            n=n,
+        )
+        for message in messages
+    ]
+    responses = await tqdm_asyncio.gather(*async_responses)
+    empty_dict = {
+        "question": "",
+        "options": {
+            "A": "",
+            "B": "",
+            "C": "",
+            "D": "",
+        },
+        "distractors": {
+            "E": "",
+            "F": "",
+            "G": "",
+        },
+        "correct_answer": ""
+        }
+    empty_str = ""
+    outputs = []
+    for response in responses:
+        if n == 1:
+            if json_format:
+                if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content:
+                    outputs.append(json.loads(response.choices[0].message.content))
+                else:
+                    outputs.append(empty_dict)
+            else:
+                if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content:
+                    outputs.append(response.choices[0].message.content)
+                else:
+                    outputs.append(empty_str)
+        else:
+            if json_format:
+                outputs.append([
+                    json.loads(response.choices[i].message.content) if response and response.choices[i].message.content else empty_dict
+                    for i in range(n)
+                ])
+            else:
+                outputs.append([
+                    response.choices[i].message.content if response and response.choices[i].message.content else empty_str
+                    for i in range(n)
+                ])
+    return outputs
+async def _throttled_claude_chat_completion_acreate(
+    client: AsyncAnthropic,
+    model: str,
+    messages,
+    temperature: float,
+    max_tokens: int,
+    top_p: float,
+    limiter: aiolimiter.AsyncLimiter,
+):
+    async with limiter:
+        try:
+            return await client.messages.create(
+                model=model,
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                top_p=top_p,
+            )
+        except:
+            return None
+async def generate_from_claude_chat_completion(
+    client,
+    messages,
+    engine_name: str,
+    temperature: float = 1.0,
+    max_tokens: int = 512,
+    top_p: float = 1.0,
+    requests_per_minute: int = 100,
+    n: int = 1,
+):
+    # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168
+    delay = 60.0 / requests_per_minute
+    limiter = aiolimiter.AsyncLimiter(1, delay)
+    n_messages = []
+    for message in messages:
+        for _ in range(n):
+            n_messages.append(message)
+    async_responses = [
+        _throttled_claude_chat_completion_acreate(
+            client,
+            model=engine_name,
+            messages=message,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            limiter=limiter,
+        )
+        for message in n_messages
+    ]
+    responses = await tqdm_asyncio.gather(*async_responses)
+    outputs = []
+    if n == 1:
+        for response in responses:
+            if response and response.content and response.content[0] and response.content[0].text:
+                outputs.append(response.content[0].text)
+            else:
+                outputs.append("")
+    else:
+        idx = 0
+        for response in responses:
+            if idx % n == 0:
+                outputs.append([])
+            idx += 1
+            if response and response.content and response.content[0] and response.content[0].text:
+                outputs[-1].append(response.content[0].text)
+            else:
+                outputs[-1].append("")
+    return outputs

utils/generate_distractors.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import json
+import re
+from tqdm import tqdm
+import os
+import asyncio
+from openai import AsyncOpenAI
+from utils.api_utils import generate_from_openai_chat_completion, generate_from_claude_chat_completion
+def construct_prompt_textonly(question: str, options: list, answer: str, answer_analysis: str) -> str:
+    optionized_list = [f"{chr(65 + i)}. {option}" for i, option in enumerate(options)]
+    optionized_str = "\n".join(optionized_list)
+    prompt = f"""
+Generate a multiple-choice question with additional distractors that increase the complexity of answer selection. Follow these instructions:
+1. **Retain Original Structure**: Retain the original question and options.
+2. **Add Three Distractors**: Add three new distractors that are **plausible and maintain professional validity**. These should increase the difficulty but still be incorrect, based on the original question and answer analysis.
+3. **Use Answer Analysis**: Reference the **correct answer analysis** when creating distractors to ensure they challenge **subject-matter experts**.
+4. **Expert-Level Difficulty**: Keep the distractors **challenging and hard to distinguish** from the correct answer, requiring **advanced knowledge** to avoid the correct answer being too obvious.
+5. **Balanced Length**: Ensure all options have **similar lengths** to prevent any one option from standing out.
+6. **Distractors Analysis**: Provide a **distractor analysis in Chinese**, explaining why the distractors are **incorrect** but **challenging and hard to distinguish**.
+Please output the result in valid JSON format using the structure below. Make sure there are no extra commas, missing commas, extra quotation marks or missing quotation marks:
+{{
+  "question": "{question}",
+  "options": {{
+    "A": "{options[0]}",
+    "B": "{options[1]}",
+    "C": "{options[2]}",
+    "D": "{options[3]}"
+  }},
+  "distractors": {{
+    "E": "New distractor 1",
+    "F": "New distractor 2",
+    "G": "New distractor 3",
+    "analysis_of_distractors": "Use Chinese to explain why the distractors are **incorrect** but **challenging and hard to distinguish**, based on the question, options, and answer analysis.",
+  }},
+  "correct_answer": "{answer}",
+}}
+Input:
+Question: {question}
+Options:
+{optionized_str}
+Answer: {answer}
+Answer Analysis: {answer_analysis}
+"""
+    # prompt = prompt.replace("I don't know.", "Idle.")
+    return prompt
+def prepare_q_text_input(query, prompt_func=construct_prompt_textonly):
+    question = query['question']
+    options = [query['option_1'], query['option_2'], query['option_3'], query['option_4']]
+    gt = query['answer']
+    answer_analysis = query['answer_analysis']
+    q_text_prompt = prompt_func(question=question, options=options, answer=gt, answer_analysis=answer_analysis)
+    return q_text_prompt
+def prepare_q_inputs(queries):
+    messages = []
+    for i, query in enumerate(queries):
+        q_text_prompt = prepare_q_text_input(query)
+        prompt_message = [
+            {
+                "role": "user",
+                "content": q_text_prompt,
+            },
+        ]
+        messages.append(prompt_message)
+    return messages
+# def extract_json_from_text(text):
+#     text = json.dumps(text)
+#     # 移除转义符和换行符
+#     text = text.replace('\\n', '').replace('\\"', '"')
+#     # 定义匹配 JSON 对象的正则表达式模式
+#     json_pattern = re.compile(
+#         r'\{\s*"question":\s*"([^"]*)",\s*"options":\s*\{\s*"A":\s*"([^"]*)",\s*"B":\s*"([^"]*)",\s*"C":\s*"([^"]*)",\s*"D":\s*"([^"]*)"\s*\},'
+#         r'\s*"distractors":\s*\{\s*"E":\s*"([^"]*)",\s*"F":\s*"([^"]*)",\s*"G":\s*"([^"]*)"\s*\},\s*"correct_answer":\s*"([^"]*)"\s*\}',
+#         re.DOTALL
+#     )
+#     # 匹配 JSON 结构
+#     match = json_pattern.search(text)
+#     if match:
+#         # 捕获到的匹配组
+#         question = match.group(1)
+#         option_a = match.group(2)
+#         option_b = match.group(3)
+#         option_c = match.group(4)
+#         option_d = match.group(5)
+#         distractor_e = match.group(6)
+#         distractor_f = match.group(7)
+#         distractor_g = match.group(8)
+#         correct_answer = match.group(9)
+#         # 构建 JSON 对象
+#         json_data = {
+#             "question": question,
+#             "options": {
+#                 "A": option_a,
+#                 "B": option_b,
+#                 "C": option_c,
+#                 "D": option_d
+#             },
+#             "distractors": {
+#                 "E": distractor_e,
+#                 "F": distractor_f,
+#                 "G": distractor_g
+#             },
+#             "correct_answer": correct_answer
+#         }
+#         return json_data
+#     else:
+#         print("No JSON object found in the text.")
+#         return None
+def generate_distractors(model_name: str,
+                      queries: list,
+                      n: int=1,
+                      max_tokens: int=4096):
+    assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
+    client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"),base_url="https://yanlp.zeabur.app/v1")
+    messages = prepare_q_inputs(queries)
+    responses = asyncio.run(
+        generate_from_openai_chat_completion(
+            client,
+            messages=messages,
+            engine_name=model_name,
+            n = n,
+            max_tokens=max_tokens,
+            requests_per_minute=30,
+            json_format=True
+        )
+    )
+    for query, response in zip(queries, responses):
+        new_options = response
+        # print(new_options)
+        if new_options and "distractors" in new_options:
+            query["option_5"] = new_options["distractors"].get("E", "")
+        else:
+            query["option_5"] = ""
+        if new_options and "distractors" in new_options:
+            query["option_6"] = new_options["distractors"].get("F", "")
+        else:
+            query["option_6"] = ""
+        if new_options and "distractors" in new_options:
+            query["option_7"] = new_options["distractors"].get("G", "")
+        else:
+            query["option_7"] = ""
+        if new_options and "distractors" in new_options:
+            query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
+        else:
+            query["distractor_analysis"] = ""
+    return queries

utils/generate_translation.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import json
+import re
+from tqdm import tqdm
+import os
+import asyncio
+from openai import AsyncOpenAI
+from utils.api_utils import generate_from_openai_chat_completion, generate_from_claude_chat_completion
+def construct_translate_prompt_textonly(question: str, options: list, text_only_analysis: str) -> str:
+    optionized_list = [f"{chr(65 + i)}. {option}" for i, option in enumerate(options)]
+    QA_str = question + "\n" + "\n".join(optionized_list)
+    prompt = f"""
+Please translate the following inputs into Chinese, ensuring they maintain a professional tone. If the input is empty, return an empty string.
+Output the result in valid JSON format using the structure provided below. Be careful to avoid extra commas or missing quotation marks:
+{{
+  "QA": "The translation of QA str",
+  "ToA" "The translation of text_only_analysis.",
+}}
+Input:
+QA: {QA_str}
+text_only_analysis: {text_only_analysis}
+"""
+    # prompt = prompt.replace("I don't know.", "Idle.")
+    return prompt
+def prepare_q_text_input_translation(query, prompt_func=construct_translate_prompt_textonly):
+    question = query['question']
+    options = [query['option_1'], query['option_2'], query['option_3'], query['option_4'],query['option_5'],query['option_6'],query['option_7']]
+    text_only_analysis = query['text_only_example_response']
+    q_text_prompt = prompt_func(question=question, options=options, text_only_analysis=text_only_analysis)
+    return q_text_prompt
+def prepare_q_inputs_translation(queries):
+    messages = []
+    for i, query in enumerate(queries):
+        q_text_prompt = prepare_q_text_input_translation(query)
+        prompt_message = [
+            {
+                "role": "user",
+                "content": q_text_prompt,
+            },
+        ]
+        messages.append(prompt_message)
+    return messages
+def generate_translation(model_name: str,
+                      queries: list,
+                      n: int=1,
+                      max_tokens: int=2048):
+    assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
+    client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"),base_url="https://yanlp.zeabur.app/v1")
+    messages = prepare_q_inputs_translation(queries)
+    responses = asyncio.run(
+        generate_from_openai_chat_completion(
+            client,
+            messages=messages,
+            engine_name=model_name,
+            n = n,
+            max_tokens=max_tokens,
+            requests_per_minute=30,
+            json_format=True
+        )
+    )
+    for query, response in zip(queries, responses):
+        new_options = response
+        # print(new_options)
+        if new_options:
+            query["QA_translation"] = new_options.get("QA", "")
+        else:
+            query["QA_translation"] = ""
+        if new_options:
+            query["text_only_example_response_translation"] = new_options.get("ToA", "")
+        else:
+            query["text_only_example_response_translation"] = ""
+    return queries