CC_and_Distractors

Sleeping

App Files Files Community

谢璐璟 commited on Sep 18, 2024

Commit

8391956

1 Parent(s): bee8e94

sync

Browse files

Files changed (4) hide show

utils/__pycache__/api_utils.cpython-310.pyc +0 -0
utils/__pycache__/generate_distractors.cpython-310.pyc +0 -0
utils/api_utils.py +47 -132
utils/generate_distractors.py +18 -78

utils/__pycache__/api_utils.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/api_utils.cpython-310.pyc and b/utils/__pycache__/api_utils.cpython-310.pyc differ

utils/__pycache__/generate_distractors.cpython-310.pyc CHANGED Viewed

Binary files a/utils/__pycache__/generate_distractors.cpython-310.pyc and b/utils/__pycache__/generate_distractors.cpython-310.pyc differ

utils/api_utils.py CHANGED Viewed

@@ -1,68 +1,59 @@
 import base64
 import numpy as np
-from typing import Dict
 import random
-import asyncio
 import logging
-import os, json
-from typing import Any
-from aiohttp import ClientSession
-from tqdm.asyncio import tqdm_asyncio
-import random
 from time import sleep
-import aiolimiter
-import openai
-from openai import AsyncOpenAI, OpenAIError
-from anthropic import AsyncAnthropic
-async def _throttled_openai_chat_completion_acreate(
-    client: AsyncOpenAI,
     model: str,
     messages,
     temperature: float,
     max_tokens: int,
     top_p: float,
-    limiter: aiolimiter.AsyncLimiter,
     json_format: bool = False,
     n: int = 1,
 ):
-    async with limiter:
-        for _ in range(10):
-            try:
-                if json_format:
-                    return await client.chat.completions.create(
-                        model=model,
-                        messages=messages,
-                        temperature=temperature,
-                        max_tokens=max_tokens,
-                        top_p=top_p,
-                        n=n,
-                        response_format={"type": "json_object"},
-                    )
-                else:
-                    return await client.chat.completions.create(
-                        model=model,
-                        messages=messages,
-                        temperature=temperature,
-                        max_tokens=max_tokens,
-                        top_p=top_p,
-                        n=n,
-                    )
-            except openai.RateLimitError as e:
-                print("Rate limit exceeded, retrying...")
-                sleep(random.randint(10, 20))  # 增加重试等待时间
-            except openai.BadRequestError as e:
-                print(e)
-                return None
-            except OpenAIError as e:
-                print(e)
-                sleep(random.randint(5, 10))
-        return None
-async def generate_from_openai_chat_completion(
     client,
     messages,
     engine_name: str,
@@ -73,26 +64,24 @@ async def generate_from_openai_chat_completion(
     json_format: bool = False,
     n: int = 1,
 ):
-    # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168
     delay = 60.0 / requests_per_minute
-    limiter = aiolimiter.AsyncLimiter(1, delay)
-    async_responses = [
-        _throttled_openai_chat_completion_acreate(
             client,
             model=engine_name,
             messages=message,
             temperature=temperature,
             max_tokens=max_tokens,
             top_p=top_p,
-            limiter=limiter,
             json_format=json_format,
             n=n,
         )
         for message in messages
     ]
-    responses = await tqdm_asyncio.gather(*async_responses)
     empty_dict = {
         "question": "",
         "options": {
@@ -107,7 +96,7 @@ async def generate_from_openai_chat_completion(
             "G": "",
         },
         "correct_answer": ""
-        }
     empty_str = ""
     outputs = []
     for response in responses:
@@ -135,77 +124,3 @@ async def generate_from_openai_chat_completion(
                 ])
     return outputs
-async def _throttled_claude_chat_completion_acreate(
-    client: AsyncAnthropic,
-    model: str,
-    messages,
-    temperature: float,
-    max_tokens: int,
-    top_p: float,
-    limiter: aiolimiter.AsyncLimiter,
-):
-    async with limiter:
-        try:
-            return await client.messages.create(
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                top_p=top_p,
-            )
-        except:
-            return None
-async def generate_from_claude_chat_completion(
-    client,
-    messages,
-    engine_name: str,
-    temperature: float = 1.0,
-    max_tokens: int = 512,
-    top_p: float = 1.0,
-    requests_per_minute: int = 100,
-    n: int = 1,
-):
-    # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168
-    delay = 60.0 / requests_per_minute
-    limiter = aiolimiter.AsyncLimiter(1, delay)
-    n_messages = []
-    for message in messages:
-        for _ in range(n):
-            n_messages.append(message)
-    async_responses = [
-        _throttled_claude_chat_completion_acreate(
-            client,
-            model=engine_name,
-            messages=message,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            top_p=top_p,
-            limiter=limiter,
-        )
-        for message in n_messages
-    ]
-    responses = await tqdm_asyncio.gather(*async_responses)
-    outputs = []
-    if n == 1:
-        for response in responses:
-            if response and response.content and response.content[0] and response.content[0].text:
-                outputs.append(response.content[0].text)
-            else:
-                outputs.append("")
-    else:
-        idx = 0
-        for response in responses:
-            if idx % n == 0:
-                outputs.append([])
-            idx += 1
-            if response and response.content and response.content[0] and response.content[0].text:
-                outputs[-1].append(response.content[0].text)
-            else:
-                outputs[-1].append("")
-    return outputs

 import base64
 import numpy as np
 import random
 import logging
+import os
+import json
+import openai
+from openai import OpenAIError
 from time import sleep
+def _throttled_openai_chat_completion_create(
+    client,
     model: str,
     messages,
     temperature: float,
     max_tokens: int,
     top_p: float,
     json_format: bool = False,
     n: int = 1,
 ):
+    """同步的OpenAI聊天补全函数，支持限流与重试"""
+    for _ in range(10):  # 进行10次尝试
+        try:
+            if json_format:
+                return client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    top_p=top_p,
+                    n=n,
+                    response_format={"type": "json_object"},
+                )
+            else:
+                return client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    top_p=top_p,
+                    n=n,
+                )
+        except openai.RateLimitError as e:
+            print("Rate limit exceeded, retrying...")
+            sleep(random.randint(10, 20))  # 增加重试等待时间
+        except openai.BadRequestError as e:
+            print(e)
+            return None
+        except OpenAIError as e:
+            print(e)
+            sleep(random.randint(5, 10))
+    return None
+def generate_from_openai_chat_completion(
     client,
     messages,
     engine_name: str,
     json_format: bool = False,
     n: int = 1,
 ):
+    """同步生成OpenAI聊天补全"""
     delay = 60.0 / requests_per_minute
+    sleep(delay)  # 简单的限流处理
+    responses = [
+        _throttled_openai_chat_completion_create(
             client,
             model=engine_name,
             messages=message,
             temperature=temperature,
             max_tokens=max_tokens,
             top_p=top_p,
             json_format=json_format,
             n=n,
         )
         for message in messages
     ]
     empty_dict = {
         "question": "",
         "options": {
             "G": "",
         },
         "correct_answer": ""
+    }
     empty_str = ""
     outputs = []
     for response in responses:
                 ])
     return outputs

utils/generate_distractors.py CHANGED Viewed

@@ -2,10 +2,9 @@ import json
 import re
 from tqdm import tqdm
 import os
-import asyncio
-from openai import AsyncOpenAI
-from utils.api_utils import generate_from_openai_chat_completion, generate_from_claude_chat_completion
 def construct_prompt_textonly(question: str, options: list, answer: str, answer_analysis: str) -> str:
@@ -19,7 +18,7 @@ Generate a multiple-choice question with additional distractors that increase th
 3. **Use Answer Analysis**: Reference the **correct answer analysis** when creating distractors to ensure they challenge **subject-matter experts**.
 4. **Expert-Level Difficulty**: Keep the distractors **challenging and hard to distinguish** from the correct answer, requiring **advanced knowledge** to avoid the correct answer being too obvious.
 5. **Balanced Length**: Ensure all options have **similar lengths** to prevent any one option from standing out.
-6. **Distractors Analysis**: Provide a **distractor analysis in Chinese**, explaining why the distractors are **incorrect** but **challenging and hard to distinguish**.
 Please output the result in valid JSON format using the structure below. Make sure there are no extra commas, missing commas, extra quotation marks or missing quotation marks:
 {{
@@ -47,7 +46,6 @@ Answer: {answer}
 Answer Analysis: {answer_analysis}
 """
-    # prompt = prompt.replace("I don't know.", "Idle.")
     return prompt
@@ -75,84 +73,32 @@ def prepare_q_inputs(queries):
         messages.append(prompt_message)
     return messages
-# def extract_json_from_text(text):
-#     text = json.dumps(text)
-#     # 移除转义符和换行符
-#     text = text.replace('\\n', '').replace('\\"', '"')
-#     # 定义匹配 JSON 对象的正则表达式模式
-#     json_pattern = re.compile(
-#         r'\{\s*"question":\s*"([^"]*)",\s*"options":\s*\{\s*"A":\s*"([^"]*)",\s*"B":\s*"([^"]*)",\s*"C":\s*"([^"]*)",\s*"D":\s*"([^"]*)"\s*\},'
-#         r'\s*"distractors":\s*\{\s*"E":\s*"([^"]*)",\s*"F":\s*"([^"]*)",\s*"G":\s*"([^"]*)"\s*\},\s*"correct_answer":\s*"([^"]*)"\s*\}',
-#         re.DOTALL
-#     )
-#     # 匹配 JSON 结构
-#     match = json_pattern.search(text)
-#     if match:
-#         # 捕获到的匹配组
-#         question = match.group(1)
-#         option_a = match.group(2)
-#         option_b = match.group(3)
-#         option_c = match.group(4)
-#         option_d = match.group(5)
-#         distractor_e = match.group(6)
-#         distractor_f = match.group(7)
-#         distractor_g = match.group(8)
-#         correct_answer = match.group(9)
-#         # 构建 JSON 对象
-#         json_data = {
-#             "question": question,
-#             "options": {
-#                 "A": option_a,
-#                 "B": option_b,
-#                 "C": option_c,
-#                 "D": option_d
-#             },
-#             "distractors": {
-#                 "E": distractor_e,
-#                 "F": distractor_f,
-#                 "G": distractor_g
-#             },
-#             "correct_answer": correct_answer
-#         }
-#         return json_data
-#     else:
-#         print("No JSON object found in the text.")
-#         return None
 def generate_distractors(model_name: str,
-                      queries: list,
-                      n: int=1,
-                      max_tokens: int=4096):
     assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
-    client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"),base_url="https://yanlp.zeabur.app/v1")
     messages = prepare_q_inputs(queries)
-    responses = asyncio.run(
-        generate_from_openai_chat_completion(
-            client,
-            messages=messages,
-            engine_name=model_name,
-            n = n,
-            max_tokens=max_tokens,
-            requests_per_minute=30,
-            json_format=True
-        )
     )
     for query, response in zip(queries, responses):
         new_options = response
-        # print(new_options)
         if new_options and "distractors" in new_options:
             query["option_5"] = new_options["distractors"].get("E", "")
         else:
@@ -170,10 +116,4 @@ def generate_distractors(model_name: str,
         else:
             query["distractor_analysis"] = ""
-    return queries

 import re
 from tqdm import tqdm
 import os
+from openai import OpenAI  # 替换 AsyncOpenAI
+from utils.api_utils import generate_from_openai_chat_completion
 def construct_prompt_textonly(question: str, options: list, answer: str, answer_analysis: str) -> str:
 3. **Use Answer Analysis**: Reference the **correct answer analysis** when creating distractors to ensure they challenge **subject-matter experts**.
 4. **Expert-Level Difficulty**: Keep the distractors **challenging and hard to distinguish** from the correct answer, requiring **advanced knowledge** to avoid the correct answer being too obvious.
 5. **Balanced Length**: Ensure all options have **similar lengths** to prevent any one option from standing out.
+6. **Distractors Analysis**: Provide a **distractor analysis in Chinese**, explaining why the distractors are **incorrect** but **challenging and hard to distinguish**, based on the question, options, and answer analysis.
 Please output the result in valid JSON format using the structure below. Make sure there are no extra commas, missing commas, extra quotation marks or missing quotation marks:
 {{
 Answer Analysis: {answer_analysis}
 """
     return prompt
         messages.append(prompt_message)
     return messages
 def generate_distractors(model_name: str,
+                              queries: list,
+                              n: int=1,
+                              max_tokens: int=4096):
     assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
+    # 改用同步版本的 OpenAI 客户端
+    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://yanlp.zeabur.app/v1")
     messages = prepare_q_inputs(queries)
+    # 直接调用同步的 `generate_from_openai_chat_completion_sync`
+    responses = generate_from_openai_chat_completion(
+        client,
+        messages=messages,
+        engine_name=model_name,
+        n=n,
+        max_tokens=max_tokens,
+        requests_per_minute=30,
+        json_format=True
     )
     for query, response in zip(queries, responses):
         new_options = response
         if new_options and "distractors" in new_options:
             query["option_5"] = new_options["distractors"].get("E", "")
         else:
         else:
             query["distractor_analysis"] = ""
+    return queries