Spaces:
Sleeping
Sleeping
่ฐข็็
commited on
Commit
โข
e9ce3e8
1
Parent(s):
c1d41a3
- README.md +1 -1
- app.py +306 -18
- utils/__pycache__/api_utils.cpython-310.pyc +0 -0
- utils/__pycache__/generate_distractors.cpython-310.pyc +0 -0
- utils/__pycache__/generate_translation.cpython-310.pyc +0 -0
- utils/__pycache__/prompt.cpython-310.pyc +0 -0
- utils/__pycache__/prompt.cpython-311.pyc +0 -0
- utils/__pycache__/prompt.cpython-38.pyc +0 -0
- utils/__pycache__/turkle.cpython-310.pyc +0 -0
- utils/api_utils.py +211 -0
- utils/generate_distractors.py +179 -0
- utils/generate_translation.py +98 -0
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: ๐
|
4 |
colorFrom: indigo
|
5 |
colorTo: indigo
|
|
|
1 |
---
|
2 |
+
title: CC_and_Newoptions
|
3 |
emoji: ๐
|
4 |
colorFrom: indigo
|
5 |
colorTo: indigo
|
app.py
CHANGED
@@ -1,45 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
import
|
|
|
|
|
3 |
import urllib.request
|
|
|
|
|
|
|
4 |
|
5 |
-
#
|
6 |
-
API_KEY =
|
7 |
|
8 |
def get_youtube_id(youtube_url):
|
9 |
if 'youtube.com' in youtube_url:
|
10 |
video_id = youtube_url.split('v=')[-1]
|
|
|
11 |
elif 'youtu.be' in youtube_url:
|
12 |
video_id = youtube_url.split('/')[-1].split('?')[0]
|
|
|
|
|
13 |
return video_id
|
14 |
|
15 |
def check_cc_license(youtube_url):
|
16 |
-
#
|
17 |
video_id = get_youtube_id(youtube_url)
|
|
|
|
|
18 |
|
19 |
-
# YouTube Data API URL
|
20 |
api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
|
21 |
|
22 |
try:
|
23 |
-
#
|
24 |
response = urllib.request.urlopen(api_url)
|
25 |
data = json.load(response)
|
26 |
|
27 |
-
#
|
28 |
-
|
|
|
29 |
if item['status']['license'] == 'creativeCommon':
|
30 |
-
return
|
31 |
else:
|
32 |
-
return
|
|
|
|
|
33 |
|
34 |
except Exception as e:
|
35 |
return f"An error occurred: {str(e)}"
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
if __name__ == "__main__":
|
45 |
-
interface.launch()
|
|
|
1 |
+
# import gradio as gr
|
2 |
+
# import json, os
|
3 |
+
# import urllib.request
|
4 |
+
|
5 |
+
# # Replace 'YOUR_API_KEY' with your actual YouTube Data API key
|
6 |
+
# API_KEY = os.getenv('api_key')
|
7 |
+
|
8 |
+
# def get_youtube_id(youtube_url):
|
9 |
+
# if 'youtube.com' in youtube_url:
|
10 |
+
# video_id = youtube_url.split('v=')[-1]
|
11 |
+
# elif 'youtu.be' in youtube_url:
|
12 |
+
# video_id = youtube_url.split('/')[-1].split('?')[0]
|
13 |
+
# return video_id
|
14 |
+
|
15 |
+
# def check_cc_license(youtube_url):
|
16 |
+
# # Extract video ID from the URL
|
17 |
+
# video_id = get_youtube_id(youtube_url)
|
18 |
+
|
19 |
+
# # YouTube Data API URL to get video details
|
20 |
+
# api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
|
21 |
+
|
22 |
+
# try:
|
23 |
+
# # Fetch video details
|
24 |
+
# response = urllib.request.urlopen(api_url)
|
25 |
+
# data = json.load(response)
|
26 |
+
|
27 |
+
# # Check the license status
|
28 |
+
# for item in data['items']:
|
29 |
+
# if item['status']['license'] == 'creativeCommon':
|
30 |
+
# return f"Yes."
|
31 |
+
# else:
|
32 |
+
# return f"No."
|
33 |
+
|
34 |
+
# except Exception as e:
|
35 |
+
# return f"An error occurred: {str(e)}"
|
36 |
+
|
37 |
+
# # Gradio interface
|
38 |
+
# interface = gr.Interface(
|
39 |
+
# fn=check_cc_license,
|
40 |
+
# inputs=gr.Textbox(label="YouTube Video URL"),
|
41 |
+
# outputs=gr.Textbox(label="Creative Commons license?")
|
42 |
+
# )
|
43 |
+
|
44 |
+
# if __name__ == "__main__":
|
45 |
+
# interface.launch()
|
46 |
+
|
47 |
+
|
48 |
+
# import gradio as gr
|
49 |
+
# import asyncio
|
50 |
+
# import os
|
51 |
+
# from openai import AsyncOpenAI
|
52 |
+
|
53 |
+
# # ไปๆจ็ๆจกๅไธญๅฏผๅ
ฅๅฟ
่ฆ็ๅฝๆฐ
|
54 |
+
# from utils.generate_distractors import prepare_q_inputs, construct_prompt_textonly, generate_distractors
|
55 |
+
# from utils.api_utils import generate_from_openai_chat_completion
|
56 |
+
# # ไฟฎๆนgenerate_distractorsๅฝๆฐ๏ผไฝฟๅ
ถๆไธบๅผๆญฅๅฝๆฐ
|
57 |
+
# # ๅ่ฎพgenerate_distractorsๅฝๆฐๅฎไนๅจๆจ็ๆจกๅไธญ๏ผๆไปฌ้่ฆไฟฎๆนๅฎ
|
58 |
+
# # ๅฆๆๆ ๆณไฟฎๆนๅๅงๆจกๅ๏ผ่ฏทๅจๆญคๅค้ๆฐๅฎไน
|
59 |
+
|
60 |
+
# async def generate_distractors_async(model_name: str,
|
61 |
+
# queries: list,
|
62 |
+
# n: int=1,
|
63 |
+
# max_tokens: int=4096):
|
64 |
+
# assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
|
65 |
+
|
66 |
+
# client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://yanlp.zeabur.app/v1")
|
67 |
+
# messages = prepare_q_inputs(queries)
|
68 |
+
|
69 |
+
# # ็ดๆฅ็ญๅพ
ๅ็จ่ไธๆฏไฝฟ็จasyncio.run()
|
70 |
+
# responses = await generate_from_openai_chat_completion(
|
71 |
+
# client,
|
72 |
+
# messages=messages,
|
73 |
+
# engine_name=model_name,
|
74 |
+
# n=n,
|
75 |
+
# max_tokens=max_tokens,
|
76 |
+
# requests_per_minute=30,
|
77 |
+
# json_format=True
|
78 |
+
# )
|
79 |
+
|
80 |
+
# for query, response in zip(queries, responses):
|
81 |
+
# new_options = response
|
82 |
+
# if new_options and "distractors" in new_options:
|
83 |
+
# query["option_5"] = new_options["distractors"].get("E", "")
|
84 |
+
# query["option_6"] = new_options["distractors"].get("F", "")
|
85 |
+
# query["option_7"] = new_options["distractors"].get("G", "")
|
86 |
+
# query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
|
87 |
+
# else:
|
88 |
+
# query["option_5"] = ""
|
89 |
+
# query["option_6"] = ""
|
90 |
+
# query["option_7"] = ""
|
91 |
+
# query["distractor_analysis"] = ""
|
92 |
+
|
93 |
+
# return queries
|
94 |
+
|
95 |
+
# # ๅฎไนๅผๆญฅๅค็ๅฝๆฐ
|
96 |
+
# async def generate_distractors_gradio(question, option1, option2, option3, option4, answer, answer_analysis):
|
97 |
+
# query = {
|
98 |
+
# 'question': question,
|
99 |
+
# 'option_1': option1,
|
100 |
+
# 'option_2': option2,
|
101 |
+
# 'option_3': option3,
|
102 |
+
# 'option_4': option4,
|
103 |
+
# 'answer': answer,
|
104 |
+
# 'answer_analysis': answer_analysis
|
105 |
+
# }
|
106 |
+
|
107 |
+
# queries = [query] # ๅ ไธบๅฝๆฐๆๆ็ๆฏไธไธชๅ่กจ
|
108 |
+
|
109 |
+
# # ่ฐ็จๅผๆญฅ็ๆๅนฒๆฐ้กน็ๅฝๆฐ
|
110 |
+
# results = await generate_distractors_async(
|
111 |
+
# model_name="gpt-4o-mini",
|
112 |
+
# queries=queries,
|
113 |
+
# n=1,
|
114 |
+
# max_tokens=4096
|
115 |
+
# )
|
116 |
+
|
117 |
+
# # ๆๅ็ปๆ
|
118 |
+
# result = results[0]
|
119 |
+
# new_options = {
|
120 |
+
# 'E': result.get('option_5', ''),
|
121 |
+
# 'F': result.get('option_6', ''),
|
122 |
+
# 'G': result.get('option_7', '')
|
123 |
+
# }
|
124 |
+
# distractor_analysis = result.get('distractor_analysis', '')
|
125 |
+
|
126 |
+
# # ่ฟๅๆฐ็ๅนฒๆฐ้กนๅๅๆ
|
127 |
+
# return new_options, distractor_analysis
|
128 |
+
|
129 |
+
# # ๅๅปบGradio็้ข
|
130 |
+
# with gr.Blocks() as demo:
|
131 |
+
# gr.Markdown("# ๅค้กน้ๆฉ้ขๅนฒๆฐ้กน็ๆๅจ")
|
132 |
+
# with gr.Row():
|
133 |
+
# question_input = gr.Textbox(label="้ฎ้ข", lines=2)
|
134 |
+
# with gr.Row():
|
135 |
+
# option1_input = gr.Textbox(label="้้กนA")
|
136 |
+
# option2_input = gr.Textbox(label="้้กนB")
|
137 |
+
# with gr.Row():
|
138 |
+
# option3_input = gr.Textbox(label="้้กนC")
|
139 |
+
# option4_input = gr.Textbox(label="้้กนD")
|
140 |
+
# with gr.Row():
|
141 |
+
# answer_input = gr.Textbox(label="ๆญฃ็กฎ็ญๆก")
|
142 |
+
# with gr.Row():
|
143 |
+
# answer_analysis_input = gr.Textbox(label="็ญๆก่งฃๆ", lines=3)
|
144 |
+
# with gr.Row():
|
145 |
+
# generate_button = gr.Button("็ๆๅนฒๆฐ้กน")
|
146 |
+
# with gr.Row():
|
147 |
+
# output_options = gr.JSON(label="็ๆ็ๅนฒๆฐ้้กน")
|
148 |
+
# with gr.Row():
|
149 |
+
# output_analysis = gr.Textbox(label="ๅนฒๆฐ้กน่งฃๆ", lines=5)
|
150 |
+
|
151 |
+
# # ๅฎไนๆ้ฎ็นๅปไบไปถ๏ผๆณจๆ่ฟ้ไธ้่ฆไฟฎๆน๏ผGradioไผ่ชๅจๅค็ๅผๆญฅๅฝๆฐ
|
152 |
+
# generate_button.click(
|
153 |
+
# fn=generate_distractors_gradio,
|
154 |
+
# inputs=[question_input, option1_input, option2_input, option3_input, option4_input, answer_input, answer_analysis_input],
|
155 |
+
# outputs=[output_options, output_analysis]
|
156 |
+
# )
|
157 |
+
|
158 |
+
# # ่ฟ่กGradioๅบ็จ
|
159 |
+
# demo.launch()
|
160 |
+
|
161 |
+
|
162 |
+
|
163 |
+
|
164 |
import gradio as gr
|
165 |
+
import asyncio
|
166 |
+
import os
|
167 |
+
import json
|
168 |
import urllib.request
|
169 |
+
from openai import AsyncOpenAI
|
170 |
+
|
171 |
+
# ็ฌฌไธไธชๅ่ฝ๏ผๆฃๆฅYouTube่ง้ขๆฏๅฆๅ
ทๆCreative Commons่ฎธๅฏ่ฏ
|
172 |
|
173 |
+
# ่ฏท็กฎไฟๅจ็ฏๅขๅ้ไธญ่ฎพ็ฝฎไบๆจ็YouTube Data APIๅฏ้ฅ
|
174 |
+
API_KEY = "AIzaSyDyPpkFRUpUuSMQbhxwTFxCBLK5qTHU-ms"
|
175 |
|
176 |
def get_youtube_id(youtube_url):
|
177 |
if 'youtube.com' in youtube_url:
|
178 |
video_id = youtube_url.split('v=')[-1]
|
179 |
+
video_id = video_id.split('&')[0] # ็งป้คๅฏ่ฝ็้ขๅคๅๆฐ
|
180 |
elif 'youtu.be' in youtube_url:
|
181 |
video_id = youtube_url.split('/')[-1].split('?')[0]
|
182 |
+
else:
|
183 |
+
video_id = ''
|
184 |
return video_id
|
185 |
|
186 |
def check_cc_license(youtube_url):
|
187 |
+
# ไปURLไธญๆๅ่ง้ขID
|
188 |
video_id = get_youtube_id(youtube_url)
|
189 |
+
if not video_id:
|
190 |
+
return "Invalid YouTube URL."
|
191 |
|
192 |
+
# YouTube Data API URL๏ผ็จไบ่ทๅ่ง้ข่ฏฆๆ
|
193 |
api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
|
194 |
|
195 |
try:
|
196 |
+
# ่ทๅ่ง้ข่ฏฆๆ
|
197 |
response = urllib.request.urlopen(api_url)
|
198 |
data = json.load(response)
|
199 |
|
200 |
+
# ๆฃๆฅ่ฎธๅฏ่ฏ็ถๆ
|
201 |
+
if 'items' in data and len(data['items']) > 0:
|
202 |
+
item = data['items'][0]
|
203 |
if item['status']['license'] == 'creativeCommon':
|
204 |
+
return "Yes."
|
205 |
else:
|
206 |
+
return "No."
|
207 |
+
else:
|
208 |
+
return "Video not found."
|
209 |
|
210 |
except Exception as e:
|
211 |
return f"An error occurred: {str(e)}"
|
212 |
|
213 |
+
# ็ฌฌไบไธชๅ่ฝ๏ผไธบๅค้กน้ๆฉ้ข็ๆๅนฒๆฐ้กน
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
# ไปๆจ็ๆจกๅไธญๅฏผๅ
ฅๅฟ
่ฆ็ๅฝๆฐ
|
218 |
+
from utils.generate_distractors import prepare_q_inputs, construct_prompt_textonly, generate_distractors
|
219 |
+
from utils.api_utils import generate_from_openai_chat_completion
|
220 |
+
# ไฟฎๆนgenerate_distractorsๅฝๆฐ๏ผไฝฟๅ
ถๆไธบๅผๆญฅๅฝๆฐ
|
221 |
+
# ๅ่ฎพgenerate_distractorsๅฝๆฐๅฎไนๅจๆจ็ๆจกๅไธญ๏ผๆไปฌ้่ฆไฟฎๆนๅฎ
|
222 |
+
# ๅฆๆๆ ๆณไฟฎๆนๅๅงๆจกๅ๏ผ่ฏทๅจๆญคๅค้ๆฐๅฎไน
|
223 |
+
|
224 |
+
async def generate_distractors_async(model_name: str,
|
225 |
+
queries: list,
|
226 |
+
n: int=1,
|
227 |
+
max_tokens: int=4096):
|
228 |
+
assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
|
229 |
+
|
230 |
+
client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://yanlp.zeabur.app/v1")
|
231 |
+
messages = prepare_q_inputs(queries)
|
232 |
+
|
233 |
+
# ็ดๆฅ็ญๅพ
ๅ็จ่ไธๆฏไฝฟ็จasyncio.run()
|
234 |
+
responses = await generate_from_openai_chat_completion(
|
235 |
+
client,
|
236 |
+
messages=messages,
|
237 |
+
engine_name=model_name,
|
238 |
+
n=n,
|
239 |
+
max_tokens=max_tokens,
|
240 |
+
requests_per_minute=30,
|
241 |
+
json_format=True
|
242 |
+
)
|
243 |
+
|
244 |
+
for query, response in zip(queries, responses):
|
245 |
+
new_options = response
|
246 |
+
if new_options and "distractors" in new_options:
|
247 |
+
query["option_5"] = new_options["distractors"].get("E", "")
|
248 |
+
query["option_6"] = new_options["distractors"].get("F", "")
|
249 |
+
query["option_7"] = new_options["distractors"].get("G", "")
|
250 |
+
query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
|
251 |
+
else:
|
252 |
+
query["option_5"] = ""
|
253 |
+
query["option_6"] = ""
|
254 |
+
query["option_7"] = ""
|
255 |
+
query["distractor_analysis"] = ""
|
256 |
+
|
257 |
+
return queries
|
258 |
+
|
259 |
+
# ๅฎไนๅผๆญฅๅค็ๅฝๆฐ
|
260 |
+
async def generate_distractors_gradio(question, option1, option2, option3, option4, answer, answer_analysis):
|
261 |
+
query = {
|
262 |
+
'question': question,
|
263 |
+
'option_1': option1,
|
264 |
+
'option_2': option2,
|
265 |
+
'option_3': option3,
|
266 |
+
'option_4': option4,
|
267 |
+
'answer': answer,
|
268 |
+
'answer_analysis': answer_analysis
|
269 |
+
}
|
270 |
+
|
271 |
+
queries = [query] # ๅ ไธบๅฝๆฐๆๆ็ๆฏไธไธชๅ่กจ
|
272 |
+
|
273 |
+
# ่ฐ็จๅผๆญฅ็ๆๅนฒๆฐ้กน็ๅฝๆฐ
|
274 |
+
results = await generate_distractors_async(
|
275 |
+
model_name="gpt-4o-mini",
|
276 |
+
queries=queries,
|
277 |
+
n=1,
|
278 |
+
max_tokens=4096
|
279 |
+
)
|
280 |
+
|
281 |
+
# ๆๅ็ปๆ
|
282 |
+
result = results[0]
|
283 |
+
new_options = {
|
284 |
+
'E': result.get('option_5', ''),
|
285 |
+
'F': result.get('option_6', ''),
|
286 |
+
'G': result.get('option_7', '')
|
287 |
+
}
|
288 |
+
distractor_analysis = result.get('distractor_analysis', '')
|
289 |
+
|
290 |
+
# ่ฟๅๆฐ็ๅนฒๆฐ้กนๅๅๆ
|
291 |
+
return new_options, distractor_analysis
|
292 |
+
|
293 |
+
|
294 |
+
with gr.Blocks() as demo:
|
295 |
+
gr.Markdown("# ๅคๅ่ฝGradioๅบ็จ")
|
296 |
+
|
297 |
+
with gr.Tabs():
|
298 |
+
with gr.TabItem("YouTube Creative Commonsๆฃๆฅๅจ"):
|
299 |
+
gr.Markdown("## ๆฃๆฅYouTube่ง้ขๆฏๅฆๅ
ทๆCreative Commons่ฎธๅฏ่ฏ")
|
300 |
+
youtube_url_input = gr.Textbox(label="YouTube่ง้ขURL")
|
301 |
+
cc_license_output = gr.Textbox(label="ๆฏๅฆไธบCreative Commons่ฎธๅฏ่ฏ๏ผ")
|
302 |
+
check_button = gr.Button("ๆฃๆฅ่ฎธๅฏ่ฏ")
|
303 |
+
check_button.click(
|
304 |
+
fn=check_cc_license,
|
305 |
+
inputs=youtube_url_input,
|
306 |
+
outputs=cc_license_output
|
307 |
+
)
|
308 |
+
with gr.TabItem("ๅค้กน้ๆฉ้ขๅนฒๆฐ้กน็ๆๅจ"):
|
309 |
+
gr.Markdown("## ไธบๅค้กน้ๆฉ้ข็ๆๅนฒๆฐ้กน")
|
310 |
+
with gr.Row():
|
311 |
+
question_input = gr.Textbox(label="้ฎ้ข", lines=2)
|
312 |
+
with gr.Row():
|
313 |
+
option1_input = gr.Textbox(label="้้กนA")
|
314 |
+
option2_input = gr.Textbox(label="้้กนB")
|
315 |
+
with gr.Row():
|
316 |
+
option3_input = gr.Textbox(label="้้กนC")
|
317 |
+
option4_input = gr.Textbox(label="้้กนD")
|
318 |
+
with gr.Row():
|
319 |
+
answer_input = gr.Textbox(label="ๆญฃ็กฎ็ญๆก")
|
320 |
+
with gr.Row():
|
321 |
+
answer_analysis_input = gr.Textbox(label="็ญๆก่งฃๆ", lines=3)
|
322 |
+
generate_button = gr.Button("็ๆๅนฒๆฐ้กน")
|
323 |
+
output_options = gr.JSON(label="็ๆ็ๅนฒๆฐ้้กน")
|
324 |
+
output_analysis = gr.Textbox(label="ๅนฒๆฐ้กน่งฃๆ", lines=5)
|
325 |
+
generate_button.click(
|
326 |
+
fn=generate_distractors_gradio,
|
327 |
+
inputs=[question_input, option1_input, option2_input, option3_input, option4_input, answer_input, answer_analysis_input],
|
328 |
+
outputs=[output_options, output_analysis]
|
329 |
+
)
|
330 |
+
|
331 |
+
# ่ฟ่กGradioๅบ็จ
|
332 |
+
demo.launch()
|
333 |
|
|
|
|
utils/__pycache__/api_utils.cpython-310.pyc
ADDED
Binary file (4.81 kB). View file
|
|
utils/__pycache__/generate_distractors.cpython-310.pyc
ADDED
Binary file (4.4 kB). View file
|
|
utils/__pycache__/generate_translation.cpython-310.pyc
ADDED
Binary file (2.85 kB). View file
|
|
utils/__pycache__/prompt.cpython-310.pyc
ADDED
Binary file (3.59 kB). View file
|
|
utils/__pycache__/prompt.cpython-311.pyc
ADDED
Binary file (5.88 kB). View file
|
|
utils/__pycache__/prompt.cpython-38.pyc
ADDED
Binary file (3.59 kB). View file
|
|
utils/__pycache__/turkle.cpython-310.pyc
ADDED
Binary file (3.3 kB). View file
|
|
utils/api_utils.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import numpy as np
|
3 |
+
from typing import Dict
|
4 |
+
import random
|
5 |
+
|
6 |
+
import asyncio
|
7 |
+
import logging
|
8 |
+
import os, json
|
9 |
+
from typing import Any
|
10 |
+
from aiohttp import ClientSession
|
11 |
+
from tqdm.asyncio import tqdm_asyncio
|
12 |
+
import random
|
13 |
+
from time import sleep
|
14 |
+
|
15 |
+
import aiolimiter
|
16 |
+
|
17 |
+
import openai
|
18 |
+
from openai import AsyncOpenAI, OpenAIError
|
19 |
+
from anthropic import AsyncAnthropic
|
20 |
+
|
21 |
+
async def _throttled_openai_chat_completion_acreate(
|
22 |
+
client: AsyncOpenAI,
|
23 |
+
model: str,
|
24 |
+
messages,
|
25 |
+
temperature: float,
|
26 |
+
max_tokens: int,
|
27 |
+
top_p: float,
|
28 |
+
limiter: aiolimiter.AsyncLimiter,
|
29 |
+
json_format: bool = False,
|
30 |
+
n: int = 1,
|
31 |
+
):
|
32 |
+
async with limiter:
|
33 |
+
for _ in range(10):
|
34 |
+
try:
|
35 |
+
if json_format:
|
36 |
+
return await client.chat.completions.create(
|
37 |
+
model=model,
|
38 |
+
messages=messages,
|
39 |
+
temperature=temperature,
|
40 |
+
max_tokens=max_tokens,
|
41 |
+
top_p=top_p,
|
42 |
+
n=n,
|
43 |
+
response_format={"type": "json_object"},
|
44 |
+
)
|
45 |
+
else:
|
46 |
+
return await client.chat.completions.create(
|
47 |
+
model=model,
|
48 |
+
messages=messages,
|
49 |
+
temperature=temperature,
|
50 |
+
max_tokens=max_tokens,
|
51 |
+
top_p=top_p,
|
52 |
+
n=n,
|
53 |
+
)
|
54 |
+
except openai.RateLimitError as e:
|
55 |
+
print("Rate limit exceeded, retrying...")
|
56 |
+
sleep(random.randint(10, 20)) # ๅขๅ ้่ฏ็ญๅพ
ๆถ้ด
|
57 |
+
except openai.BadRequestError as e:
|
58 |
+
print(e)
|
59 |
+
return None
|
60 |
+
except OpenAIError as e:
|
61 |
+
print(e)
|
62 |
+
sleep(random.randint(5, 10))
|
63 |
+
return None
|
64 |
+
|
65 |
+
async def generate_from_openai_chat_completion(
|
66 |
+
client,
|
67 |
+
messages,
|
68 |
+
engine_name: str,
|
69 |
+
temperature: float = 1.0,
|
70 |
+
max_tokens: int = 512,
|
71 |
+
top_p: float = 1.0,
|
72 |
+
requests_per_minute: int = 100,
|
73 |
+
json_format: bool = False,
|
74 |
+
n: int = 1,
|
75 |
+
):
|
76 |
+
# https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168
|
77 |
+
delay = 60.0 / requests_per_minute
|
78 |
+
limiter = aiolimiter.AsyncLimiter(1, delay)
|
79 |
+
async_responses = [
|
80 |
+
_throttled_openai_chat_completion_acreate(
|
81 |
+
client,
|
82 |
+
model=engine_name,
|
83 |
+
messages=message,
|
84 |
+
temperature=temperature,
|
85 |
+
max_tokens=max_tokens,
|
86 |
+
top_p=top_p,
|
87 |
+
limiter=limiter,
|
88 |
+
json_format=json_format,
|
89 |
+
n=n,
|
90 |
+
)
|
91 |
+
for message in messages
|
92 |
+
]
|
93 |
+
|
94 |
+
responses = await tqdm_asyncio.gather(*async_responses)
|
95 |
+
|
96 |
+
empty_dict = {
|
97 |
+
"question": "",
|
98 |
+
"options": {
|
99 |
+
"A": "",
|
100 |
+
"B": "",
|
101 |
+
"C": "",
|
102 |
+
"D": "",
|
103 |
+
},
|
104 |
+
"distractors": {
|
105 |
+
"E": "",
|
106 |
+
"F": "",
|
107 |
+
"G": "",
|
108 |
+
},
|
109 |
+
"correct_answer": ""
|
110 |
+
}
|
111 |
+
empty_str = ""
|
112 |
+
outputs = []
|
113 |
+
for response in responses:
|
114 |
+
if n == 1:
|
115 |
+
if json_format:
|
116 |
+
if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content:
|
117 |
+
outputs.append(json.loads(response.choices[0].message.content))
|
118 |
+
else:
|
119 |
+
outputs.append(empty_dict)
|
120 |
+
else:
|
121 |
+
if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content:
|
122 |
+
outputs.append(response.choices[0].message.content)
|
123 |
+
else:
|
124 |
+
outputs.append(empty_str)
|
125 |
+
else:
|
126 |
+
if json_format:
|
127 |
+
outputs.append([
|
128 |
+
json.loads(response.choices[i].message.content) if response and response.choices[i].message.content else empty_dict
|
129 |
+
for i in range(n)
|
130 |
+
])
|
131 |
+
else:
|
132 |
+
outputs.append([
|
133 |
+
response.choices[i].message.content if response and response.choices[i].message.content else empty_str
|
134 |
+
for i in range(n)
|
135 |
+
])
|
136 |
+
return outputs
|
137 |
+
|
138 |
+
async def _throttled_claude_chat_completion_acreate(
|
139 |
+
client: AsyncAnthropic,
|
140 |
+
model: str,
|
141 |
+
messages,
|
142 |
+
temperature: float,
|
143 |
+
max_tokens: int,
|
144 |
+
top_p: float,
|
145 |
+
limiter: aiolimiter.AsyncLimiter,
|
146 |
+
):
|
147 |
+
async with limiter:
|
148 |
+
try:
|
149 |
+
return await client.messages.create(
|
150 |
+
model=model,
|
151 |
+
messages=messages,
|
152 |
+
temperature=temperature,
|
153 |
+
max_tokens=max_tokens,
|
154 |
+
top_p=top_p,
|
155 |
+
)
|
156 |
+
except:
|
157 |
+
return None
|
158 |
+
|
159 |
+
async def generate_from_claude_chat_completion(
|
160 |
+
client,
|
161 |
+
messages,
|
162 |
+
engine_name: str,
|
163 |
+
temperature: float = 1.0,
|
164 |
+
max_tokens: int = 512,
|
165 |
+
top_p: float = 1.0,
|
166 |
+
requests_per_minute: int = 100,
|
167 |
+
n: int = 1,
|
168 |
+
):
|
169 |
+
# https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168
|
170 |
+
delay = 60.0 / requests_per_minute
|
171 |
+
limiter = aiolimiter.AsyncLimiter(1, delay)
|
172 |
+
|
173 |
+
n_messages = []
|
174 |
+
for message in messages:
|
175 |
+
for _ in range(n):
|
176 |
+
n_messages.append(message)
|
177 |
+
|
178 |
+
async_responses = [
|
179 |
+
_throttled_claude_chat_completion_acreate(
|
180 |
+
client,
|
181 |
+
model=engine_name,
|
182 |
+
messages=message,
|
183 |
+
temperature=temperature,
|
184 |
+
max_tokens=max_tokens,
|
185 |
+
top_p=top_p,
|
186 |
+
limiter=limiter,
|
187 |
+
)
|
188 |
+
for message in n_messages
|
189 |
+
]
|
190 |
+
|
191 |
+
responses = await tqdm_asyncio.gather(*async_responses)
|
192 |
+
|
193 |
+
outputs = []
|
194 |
+
if n == 1:
|
195 |
+
for response in responses:
|
196 |
+
if response and response.content and response.content[0] and response.content[0].text:
|
197 |
+
outputs.append(response.content[0].text)
|
198 |
+
else:
|
199 |
+
outputs.append("")
|
200 |
+
else:
|
201 |
+
idx = 0
|
202 |
+
for response in responses:
|
203 |
+
if idx % n == 0:
|
204 |
+
outputs.append([])
|
205 |
+
idx += 1
|
206 |
+
if response and response.content and response.content[0] and response.content[0].text:
|
207 |
+
outputs[-1].append(response.content[0].text)
|
208 |
+
else:
|
209 |
+
outputs[-1].append("")
|
210 |
+
|
211 |
+
return outputs
|
utils/generate_distractors.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re
|
3 |
+
from tqdm import tqdm
|
4 |
+
import os
|
5 |
+
import asyncio
|
6 |
+
from openai import AsyncOpenAI
|
7 |
+
|
8 |
+
from utils.api_utils import generate_from_openai_chat_completion, generate_from_claude_chat_completion
|
9 |
+
|
10 |
+
|
11 |
+
def construct_prompt_textonly(question: str, options: list, answer: str, answer_analysis: str) -> str:
|
12 |
+
optionized_list = [f"{chr(65 + i)}. {option}" for i, option in enumerate(options)]
|
13 |
+
optionized_str = "\n".join(optionized_list)
|
14 |
+
|
15 |
+
prompt = f"""
|
16 |
+
Generate a multiple-choice question with additional distractors that increase the complexity of answer selection. Follow these instructions:
|
17 |
+
1. **Retain Original Structure**: Retain the original question and options.
|
18 |
+
2. **Add Three Distractors**: Add three new distractors that are **plausible and maintain professional validity**. These should increase the difficulty but still be incorrect, based on the original question and answer analysis.
|
19 |
+
3. **Use Answer Analysis**: Reference the **correct answer analysis** when creating distractors to ensure they challenge **subject-matter experts**.
|
20 |
+
4. **Expert-Level Difficulty**: Keep the distractors **challenging and hard to distinguish** from the correct answer, requiring **advanced knowledge** to avoid the correct answer being too obvious.
|
21 |
+
5. **Balanced Length**: Ensure all options have **similar lengths** to prevent any one option from standing out.
|
22 |
+
6. **Distractors Analysis**: Provide a **distractor analysis in Chinese**, explaining why the distractors are **incorrect** but **challenging and hard to distinguish**.
|
23 |
+
|
24 |
+
Please output the result in valid JSON format using the structure below. Make sure there are no extra commas, missing commas, extra quotation marks or missing quotation marks:
|
25 |
+
{{
|
26 |
+
"question": "{question}",
|
27 |
+
"options": {{
|
28 |
+
"A": "{options[0]}",
|
29 |
+
"B": "{options[1]}",
|
30 |
+
"C": "{options[2]}",
|
31 |
+
"D": "{options[3]}"
|
32 |
+
}},
|
33 |
+
"distractors": {{
|
34 |
+
"E": "New distractor 1",
|
35 |
+
"F": "New distractor 2",
|
36 |
+
"G": "New distractor 3",
|
37 |
+
"analysis_of_distractors": "Use Chinese to explain why the distractors are **incorrect** but **challenging and hard to distinguish**, based on the question, options, and answer analysis.",
|
38 |
+
}},
|
39 |
+
"correct_answer": "{answer}",
|
40 |
+
}}
|
41 |
+
|
42 |
+
Input:
|
43 |
+
Question: {question}
|
44 |
+
Options:
|
45 |
+
{optionized_str}
|
46 |
+
Answer: {answer}
|
47 |
+
Answer Analysis: {answer_analysis}
|
48 |
+
"""
|
49 |
+
|
50 |
+
# prompt = prompt.replace("I don't know.", "Idle.")
|
51 |
+
return prompt
|
52 |
+
|
53 |
+
|
54 |
+
def prepare_q_text_input(query, prompt_func=construct_prompt_textonly):
|
55 |
+
question = query['question']
|
56 |
+
options = [query['option_1'], query['option_2'], query['option_3'], query['option_4']]
|
57 |
+
gt = query['answer']
|
58 |
+
answer_analysis = query['answer_analysis']
|
59 |
+
|
60 |
+
q_text_prompt = prompt_func(question=question, options=options, answer=gt, answer_analysis=answer_analysis)
|
61 |
+
return q_text_prompt
|
62 |
+
|
63 |
+
|
64 |
+
def prepare_q_inputs(queries):
|
65 |
+
messages = []
|
66 |
+
for i, query in enumerate(queries):
|
67 |
+
q_text_prompt = prepare_q_text_input(query)
|
68 |
+
|
69 |
+
prompt_message = [
|
70 |
+
{
|
71 |
+
"role": "user",
|
72 |
+
"content": q_text_prompt,
|
73 |
+
},
|
74 |
+
]
|
75 |
+
|
76 |
+
messages.append(prompt_message)
|
77 |
+
return messages
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
# def extract_json_from_text(text):
|
82 |
+
# text = json.dumps(text)
|
83 |
+
# # ็งป้ค่ฝฌไน็ฌฆๅๆข่ก็ฌฆ
|
84 |
+
# text = text.replace('\\n', '').replace('\\"', '"')
|
85 |
+
|
86 |
+
# # ๅฎไนๅน้
JSON ๅฏน่ฑก็ๆญฃๅ่กจ่พพๅผๆจกๅผ
|
87 |
+
# json_pattern = re.compile(
|
88 |
+
# r'\{\s*"question":\s*"([^"]*)",\s*"options":\s*\{\s*"A":\s*"([^"]*)",\s*"B":\s*"([^"]*)",\s*"C":\s*"([^"]*)",\s*"D":\s*"([^"]*)"\s*\},'
|
89 |
+
# r'\s*"distractors":\s*\{\s*"E":\s*"([^"]*)",\s*"F":\s*"([^"]*)",\s*"G":\s*"([^"]*)"\s*\},\s*"correct_answer":\s*"([^"]*)"\s*\}',
|
90 |
+
# re.DOTALL
|
91 |
+
# )
|
92 |
+
|
93 |
+
# # ๅน้
JSON ็ปๆ
|
94 |
+
# match = json_pattern.search(text)
|
95 |
+
|
96 |
+
# if match:
|
97 |
+
# # ๆ่ทๅฐ็ๅน้
็ป
|
98 |
+
# question = match.group(1)
|
99 |
+
# option_a = match.group(2)
|
100 |
+
# option_b = match.group(3)
|
101 |
+
# option_c = match.group(4)
|
102 |
+
# option_d = match.group(5)
|
103 |
+
# distractor_e = match.group(6)
|
104 |
+
# distractor_f = match.group(7)
|
105 |
+
# distractor_g = match.group(8)
|
106 |
+
# correct_answer = match.group(9)
|
107 |
+
|
108 |
+
# # ๆๅปบ JSON ๅฏน่ฑก
|
109 |
+
# json_data = {
|
110 |
+
# "question": question,
|
111 |
+
# "options": {
|
112 |
+
# "A": option_a,
|
113 |
+
# "B": option_b,
|
114 |
+
# "C": option_c,
|
115 |
+
# "D": option_d
|
116 |
+
# },
|
117 |
+
# "distractors": {
|
118 |
+
# "E": distractor_e,
|
119 |
+
# "F": distractor_f,
|
120 |
+
# "G": distractor_g
|
121 |
+
# },
|
122 |
+
# "correct_answer": correct_answer
|
123 |
+
# }
|
124 |
+
|
125 |
+
# return json_data
|
126 |
+
# else:
|
127 |
+
# print("No JSON object found in the text.")
|
128 |
+
# return None
|
129 |
+
|
130 |
+
|
131 |
+
def generate_distractors(model_name: str,
|
132 |
+
queries: list,
|
133 |
+
n: int=1,
|
134 |
+
max_tokens: int=4096):
|
135 |
+
|
136 |
+
assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
|
137 |
+
|
138 |
+
client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"),base_url="https://yanlp.zeabur.app/v1")
|
139 |
+
messages = prepare_q_inputs(queries)
|
140 |
+
|
141 |
+
responses = asyncio.run(
|
142 |
+
generate_from_openai_chat_completion(
|
143 |
+
client,
|
144 |
+
messages=messages,
|
145 |
+
engine_name=model_name,
|
146 |
+
n = n,
|
147 |
+
max_tokens=max_tokens,
|
148 |
+
requests_per_minute=30,
|
149 |
+
json_format=True
|
150 |
+
)
|
151 |
+
)
|
152 |
+
|
153 |
+
for query, response in zip(queries, responses):
|
154 |
+
new_options = response
|
155 |
+
# print(new_options)
|
156 |
+
if new_options and "distractors" in new_options:
|
157 |
+
query["option_5"] = new_options["distractors"].get("E", "")
|
158 |
+
else:
|
159 |
+
query["option_5"] = ""
|
160 |
+
if new_options and "distractors" in new_options:
|
161 |
+
query["option_6"] = new_options["distractors"].get("F", "")
|
162 |
+
else:
|
163 |
+
query["option_6"] = ""
|
164 |
+
if new_options and "distractors" in new_options:
|
165 |
+
query["option_7"] = new_options["distractors"].get("G", "")
|
166 |
+
else:
|
167 |
+
query["option_7"] = ""
|
168 |
+
if new_options and "distractors" in new_options:
|
169 |
+
query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
|
170 |
+
else:
|
171 |
+
query["distractor_analysis"] = ""
|
172 |
+
|
173 |
+
return queries
|
174 |
+
|
175 |
+
|
176 |
+
|
177 |
+
|
178 |
+
|
179 |
+
|
utils/generate_translation.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re
|
3 |
+
from tqdm import tqdm
|
4 |
+
import os
|
5 |
+
import asyncio
|
6 |
+
from openai import AsyncOpenAI
|
7 |
+
|
8 |
+
from utils.api_utils import generate_from_openai_chat_completion, generate_from_claude_chat_completion
|
9 |
+
|
10 |
+
|
11 |
+
def construct_translate_prompt_textonly(question: str, options: list, text_only_analysis: str) -> str:
|
12 |
+
optionized_list = [f"{chr(65 + i)}. {option}" for i, option in enumerate(options)]
|
13 |
+
QA_str = question + "\n" + "\n".join(optionized_list)
|
14 |
+
|
15 |
+
prompt = f"""
|
16 |
+
Please translate the following inputs into Chinese, ensuring they maintain a professional tone. If the input is empty, return an empty string.
|
17 |
+
|
18 |
+
Output the result in valid JSON format using the structure provided below. Be careful to avoid extra commas or missing quotation marks:
|
19 |
+
{{
|
20 |
+
"QA": "The translation of QA str",
|
21 |
+
"ToA" "The translation of text_only_analysis.",
|
22 |
+
}}
|
23 |
+
|
24 |
+
Input:
|
25 |
+
QA: {QA_str}
|
26 |
+
text_only_analysis: {text_only_analysis}
|
27 |
+
"""
|
28 |
+
|
29 |
+
# prompt = prompt.replace("I don't know.", "Idle.")
|
30 |
+
return prompt
|
31 |
+
|
32 |
+
|
33 |
+
def prepare_q_text_input_translation(query, prompt_func=construct_translate_prompt_textonly):
|
34 |
+
question = query['question']
|
35 |
+
options = [query['option_1'], query['option_2'], query['option_3'], query['option_4'],query['option_5'],query['option_6'],query['option_7']]
|
36 |
+
text_only_analysis = query['text_only_example_response']
|
37 |
+
|
38 |
+
q_text_prompt = prompt_func(question=question, options=options, text_only_analysis=text_only_analysis)
|
39 |
+
return q_text_prompt
|
40 |
+
|
41 |
+
|
42 |
+
def prepare_q_inputs_translation(queries):
|
43 |
+
messages = []
|
44 |
+
for i, query in enumerate(queries):
|
45 |
+
q_text_prompt = prepare_q_text_input_translation(query)
|
46 |
+
|
47 |
+
prompt_message = [
|
48 |
+
{
|
49 |
+
"role": "user",
|
50 |
+
"content": q_text_prompt,
|
51 |
+
},
|
52 |
+
]
|
53 |
+
|
54 |
+
messages.append(prompt_message)
|
55 |
+
return messages
|
56 |
+
|
57 |
+
|
58 |
+
def generate_translation(model_name: str,
|
59 |
+
queries: list,
|
60 |
+
n: int=1,
|
61 |
+
max_tokens: int=2048):
|
62 |
+
|
63 |
+
assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
|
64 |
+
|
65 |
+
client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"),base_url="https://yanlp.zeabur.app/v1")
|
66 |
+
messages = prepare_q_inputs_translation(queries)
|
67 |
+
|
68 |
+
responses = asyncio.run(
|
69 |
+
generate_from_openai_chat_completion(
|
70 |
+
client,
|
71 |
+
messages=messages,
|
72 |
+
engine_name=model_name,
|
73 |
+
n = n,
|
74 |
+
max_tokens=max_tokens,
|
75 |
+
requests_per_minute=30,
|
76 |
+
json_format=True
|
77 |
+
)
|
78 |
+
)
|
79 |
+
|
80 |
+
for query, response in zip(queries, responses):
|
81 |
+
new_options = response
|
82 |
+
# print(new_options)
|
83 |
+
if new_options:
|
84 |
+
query["QA_translation"] = new_options.get("QA", "")
|
85 |
+
else:
|
86 |
+
query["QA_translation"] = ""
|
87 |
+
if new_options:
|
88 |
+
query["text_only_example_response_translation"] = new_options.get("ToA", "")
|
89 |
+
else:
|
90 |
+
query["text_only_example_response_translation"] = ""
|
91 |
+
|
92 |
+
return queries
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|