่ฐข็’็’Ÿ commited on
Commit
e9ce3e8
โ€ข
1 Parent(s): c1d41a3
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: CC
3
  emoji: ๐Ÿ“‰
4
  colorFrom: indigo
5
  colorTo: indigo
 
1
  ---
2
+ title: CC_and_Newoptions
3
  emoji: ๐Ÿ“‰
4
  colorFrom: indigo
5
  colorTo: indigo
app.py CHANGED
@@ -1,45 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- import json, os
 
 
3
  import urllib.request
 
 
 
4
 
5
- # Replace 'YOUR_API_KEY' with your actual YouTube Data API key
6
- API_KEY = os.getenv('api_key')
7
 
8
  def get_youtube_id(youtube_url):
9
  if 'youtube.com' in youtube_url:
10
  video_id = youtube_url.split('v=')[-1]
 
11
  elif 'youtu.be' in youtube_url:
12
  video_id = youtube_url.split('/')[-1].split('?')[0]
 
 
13
  return video_id
14
 
15
  def check_cc_license(youtube_url):
16
- # Extract video ID from the URL
17
  video_id = get_youtube_id(youtube_url)
 
 
18
 
19
- # YouTube Data API URL to get video details
20
  api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
21
 
22
  try:
23
- # Fetch video details
24
  response = urllib.request.urlopen(api_url)
25
  data = json.load(response)
26
 
27
- # Check the license status
28
- for item in data['items']:
 
29
  if item['status']['license'] == 'creativeCommon':
30
- return f"Yes."
31
  else:
32
- return f"No."
 
 
33
 
34
  except Exception as e:
35
  return f"An error occurred: {str(e)}"
36
 
37
- # Gradio interface
38
- interface = gr.Interface(
39
- fn=check_cc_license,
40
- inputs=gr.Textbox(label="YouTube Video URL"),
41
- outputs=gr.Textbox(label="Creative Commons license?")
42
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- if __name__ == "__main__":
45
- interface.launch()
 
1
+ # import gradio as gr
2
+ # import json, os
3
+ # import urllib.request
4
+
5
+ # # Replace 'YOUR_API_KEY' with your actual YouTube Data API key
6
+ # API_KEY = os.getenv('api_key')
7
+
8
+ # def get_youtube_id(youtube_url):
9
+ # if 'youtube.com' in youtube_url:
10
+ # video_id = youtube_url.split('v=')[-1]
11
+ # elif 'youtu.be' in youtube_url:
12
+ # video_id = youtube_url.split('/')[-1].split('?')[0]
13
+ # return video_id
14
+
15
+ # def check_cc_license(youtube_url):
16
+ # # Extract video ID from the URL
17
+ # video_id = get_youtube_id(youtube_url)
18
+
19
+ # # YouTube Data API URL to get video details
20
+ # api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
21
+
22
+ # try:
23
+ # # Fetch video details
24
+ # response = urllib.request.urlopen(api_url)
25
+ # data = json.load(response)
26
+
27
+ # # Check the license status
28
+ # for item in data['items']:
29
+ # if item['status']['license'] == 'creativeCommon':
30
+ # return f"Yes."
31
+ # else:
32
+ # return f"No."
33
+
34
+ # except Exception as e:
35
+ # return f"An error occurred: {str(e)}"
36
+
37
+ # # Gradio interface
38
+ # interface = gr.Interface(
39
+ # fn=check_cc_license,
40
+ # inputs=gr.Textbox(label="YouTube Video URL"),
41
+ # outputs=gr.Textbox(label="Creative Commons license?")
42
+ # )
43
+
44
+ # if __name__ == "__main__":
45
+ # interface.launch()
46
+
47
+
48
+ # import gradio as gr
49
+ # import asyncio
50
+ # import os
51
+ # from openai import AsyncOpenAI
52
+
53
+ # # ไปŽๆ‚จ็š„ๆจกๅ—ไธญๅฏผๅ…ฅๅฟ…่ฆ็š„ๅ‡ฝๆ•ฐ
54
+ # from utils.generate_distractors import prepare_q_inputs, construct_prompt_textonly, generate_distractors
55
+ # from utils.api_utils import generate_from_openai_chat_completion
56
+ # # ไฟฎๆ”นgenerate_distractorsๅ‡ฝๆ•ฐ๏ผŒไฝฟๅ…ถๆˆไธบๅผ‚ๆญฅๅ‡ฝๆ•ฐ
57
+ # # ๅ‡่ฎพgenerate_distractorsๅ‡ฝๆ•ฐๅฎšไน‰ๅœจๆ‚จ็š„ๆจกๅ—ไธญ๏ผŒๆˆ‘ไปฌ้œ€่ฆไฟฎๆ”นๅฎƒ
58
+ # # ๅฆ‚ๆžœๆ— ๆณ•ไฟฎๆ”นๅŽŸๅง‹ๆจกๅ—๏ผŒ่ฏทๅœจๆญคๅค„้‡ๆ–ฐๅฎšไน‰
59
+
60
+ # async def generate_distractors_async(model_name: str,
61
+ # queries: list,
62
+ # n: int=1,
63
+ # max_tokens: int=4096):
64
+ # assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
65
+
66
+ # client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://yanlp.zeabur.app/v1")
67
+ # messages = prepare_q_inputs(queries)
68
+
69
+ # # ็›ดๆŽฅ็ญ‰ๅพ…ๅ็จ‹่€Œไธๆ˜ฏไฝฟ็”จasyncio.run()
70
+ # responses = await generate_from_openai_chat_completion(
71
+ # client,
72
+ # messages=messages,
73
+ # engine_name=model_name,
74
+ # n=n,
75
+ # max_tokens=max_tokens,
76
+ # requests_per_minute=30,
77
+ # json_format=True
78
+ # )
79
+
80
+ # for query, response in zip(queries, responses):
81
+ # new_options = response
82
+ # if new_options and "distractors" in new_options:
83
+ # query["option_5"] = new_options["distractors"].get("E", "")
84
+ # query["option_6"] = new_options["distractors"].get("F", "")
85
+ # query["option_7"] = new_options["distractors"].get("G", "")
86
+ # query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
87
+ # else:
88
+ # query["option_5"] = ""
89
+ # query["option_6"] = ""
90
+ # query["option_7"] = ""
91
+ # query["distractor_analysis"] = ""
92
+
93
+ # return queries
94
+
95
+ # # ๅฎšไน‰ๅผ‚ๆญฅๅค„็†ๅ‡ฝๆ•ฐ
96
+ # async def generate_distractors_gradio(question, option1, option2, option3, option4, answer, answer_analysis):
97
+ # query = {
98
+ # 'question': question,
99
+ # 'option_1': option1,
100
+ # 'option_2': option2,
101
+ # 'option_3': option3,
102
+ # 'option_4': option4,
103
+ # 'answer': answer,
104
+ # 'answer_analysis': answer_analysis
105
+ # }
106
+
107
+ # queries = [query] # ๅ› ไธบๅ‡ฝๆ•ฐๆœŸๆœ›็š„ๆ˜ฏไธ€ไธชๅˆ—่กจ
108
+
109
+ # # ่ฐƒ็”จๅผ‚ๆญฅ็”Ÿๆˆๅนฒๆ‰ฐ้กน็š„ๅ‡ฝๆ•ฐ
110
+ # results = await generate_distractors_async(
111
+ # model_name="gpt-4o-mini",
112
+ # queries=queries,
113
+ # n=1,
114
+ # max_tokens=4096
115
+ # )
116
+
117
+ # # ๆๅ–็ป“ๆžœ
118
+ # result = results[0]
119
+ # new_options = {
120
+ # 'E': result.get('option_5', ''),
121
+ # 'F': result.get('option_6', ''),
122
+ # 'G': result.get('option_7', '')
123
+ # }
124
+ # distractor_analysis = result.get('distractor_analysis', '')
125
+
126
+ # # ่ฟ”ๅ›žๆ–ฐ็š„ๅนฒๆ‰ฐ้กนๅ’Œๅˆ†ๆž
127
+ # return new_options, distractor_analysis
128
+
129
+ # # ๅˆ›ๅปบGradio็•Œ้ข
130
+ # with gr.Blocks() as demo:
131
+ # gr.Markdown("# ๅคš้กน้€‰ๆ‹ฉ้ข˜ๅนฒๆ‰ฐ้กน็”Ÿๆˆๅ™จ")
132
+ # with gr.Row():
133
+ # question_input = gr.Textbox(label="้—ฎ้ข˜", lines=2)
134
+ # with gr.Row():
135
+ # option1_input = gr.Textbox(label="้€‰้กนA")
136
+ # option2_input = gr.Textbox(label="้€‰้กนB")
137
+ # with gr.Row():
138
+ # option3_input = gr.Textbox(label="้€‰้กนC")
139
+ # option4_input = gr.Textbox(label="้€‰้กนD")
140
+ # with gr.Row():
141
+ # answer_input = gr.Textbox(label="ๆญฃ็กฎ็ญ”ๆกˆ")
142
+ # with gr.Row():
143
+ # answer_analysis_input = gr.Textbox(label="็ญ”ๆกˆ่งฃๆž", lines=3)
144
+ # with gr.Row():
145
+ # generate_button = gr.Button("็”Ÿๆˆๅนฒๆ‰ฐ้กน")
146
+ # with gr.Row():
147
+ # output_options = gr.JSON(label="็”Ÿๆˆ็š„ๅนฒๆ‰ฐ้€‰้กน")
148
+ # with gr.Row():
149
+ # output_analysis = gr.Textbox(label="ๅนฒๆ‰ฐ้กน่งฃๆž", lines=5)
150
+
151
+ # # ๅฎšไน‰ๆŒ‰้’ฎ็‚นๅ‡ปไบ‹ไปถ๏ผŒๆณจๆ„่ฟ™้‡Œไธ้œ€่ฆไฟฎๆ”น๏ผŒGradioไผš่‡ชๅŠจๅค„็†ๅผ‚ๆญฅๅ‡ฝๆ•ฐ
152
+ # generate_button.click(
153
+ # fn=generate_distractors_gradio,
154
+ # inputs=[question_input, option1_input, option2_input, option3_input, option4_input, answer_input, answer_analysis_input],
155
+ # outputs=[output_options, output_analysis]
156
+ # )
157
+
158
+ # # ่ฟ่กŒGradioๅบ”็”จ
159
+ # demo.launch()
160
+
161
+
162
+
163
+
164
  import gradio as gr
165
+ import asyncio
166
+ import os
167
+ import json
168
  import urllib.request
169
+ from openai import AsyncOpenAI
170
+
171
+ # ็ฌฌไธ€ไธชๅŠŸ่ƒฝ๏ผšๆฃ€ๆŸฅYouTube่ง†้ข‘ๆ˜ฏๅฆๅ…ทๆœ‰Creative Commons่ฎธๅฏ่ฏ
172
 
173
+ # ่ฏท็กฎไฟๅœจ็Žฏๅขƒๅ˜้‡ไธญ่ฎพ็ฝฎไบ†ๆ‚จ็š„YouTube Data APIๅฏ†้’ฅ
174
+ API_KEY = "AIzaSyDyPpkFRUpUuSMQbhxwTFxCBLK5qTHU-ms"
175
 
176
  def get_youtube_id(youtube_url):
177
  if 'youtube.com' in youtube_url:
178
  video_id = youtube_url.split('v=')[-1]
179
+ video_id = video_id.split('&')[0] # ็งป้™คๅฏ่ƒฝ็š„้ขๅค–ๅ‚ๆ•ฐ
180
  elif 'youtu.be' in youtube_url:
181
  video_id = youtube_url.split('/')[-1].split('?')[0]
182
+ else:
183
+ video_id = ''
184
  return video_id
185
 
186
  def check_cc_license(youtube_url):
187
+ # ไปŽURLไธญๆๅ–่ง†้ข‘ID
188
  video_id = get_youtube_id(youtube_url)
189
+ if not video_id:
190
+ return "Invalid YouTube URL."
191
 
192
+ # YouTube Data API URL๏ผŒ็”จไบŽ่Žทๅ–่ง†้ข‘่ฏฆๆƒ…
193
  api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&part=status&key={API_KEY}'
194
 
195
  try:
196
+ # ่Žทๅ–่ง†้ข‘่ฏฆๆƒ…
197
  response = urllib.request.urlopen(api_url)
198
  data = json.load(response)
199
 
200
+ # ๆฃ€ๆŸฅ่ฎธๅฏ่ฏ็Šถๆ€
201
+ if 'items' in data and len(data['items']) > 0:
202
+ item = data['items'][0]
203
  if item['status']['license'] == 'creativeCommon':
204
+ return "Yes."
205
  else:
206
+ return "No."
207
+ else:
208
+ return "Video not found."
209
 
210
  except Exception as e:
211
  return f"An error occurred: {str(e)}"
212
 
213
+ # ็ฌฌไบŒไธชๅŠŸ่ƒฝ๏ผšไธบๅคš้กน้€‰ๆ‹ฉ้ข˜็”Ÿๆˆๅนฒๆ‰ฐ้กน
214
+
215
+
216
+
217
+ # ไปŽๆ‚จ็š„ๆจกๅ—ไธญๅฏผๅ…ฅๅฟ…่ฆ็š„ๅ‡ฝๆ•ฐ
218
+ from utils.generate_distractors import prepare_q_inputs, construct_prompt_textonly, generate_distractors
219
+ from utils.api_utils import generate_from_openai_chat_completion
220
+ # ไฟฎๆ”นgenerate_distractorsๅ‡ฝๆ•ฐ๏ผŒไฝฟๅ…ถๆˆไธบๅผ‚ๆญฅๅ‡ฝๆ•ฐ
221
+ # ๅ‡่ฎพgenerate_distractorsๅ‡ฝๆ•ฐๅฎšไน‰ๅœจๆ‚จ็š„ๆจกๅ—ไธญ๏ผŒๆˆ‘ไปฌ้œ€่ฆไฟฎๆ”นๅฎƒ
222
+ # ๅฆ‚ๆžœๆ— ๆณ•ไฟฎๆ”นๅŽŸๅง‹ๆจกๅ—๏ผŒ่ฏทๅœจๆญคๅค„้‡ๆ–ฐๅฎšไน‰
223
+
224
+ async def generate_distractors_async(model_name: str,
225
+ queries: list,
226
+ n: int=1,
227
+ max_tokens: int=4096):
228
+ assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
229
+
230
+ client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://yanlp.zeabur.app/v1")
231
+ messages = prepare_q_inputs(queries)
232
+
233
+ # ็›ดๆŽฅ็ญ‰ๅพ…ๅ็จ‹่€Œไธๆ˜ฏไฝฟ็”จasyncio.run()
234
+ responses = await generate_from_openai_chat_completion(
235
+ client,
236
+ messages=messages,
237
+ engine_name=model_name,
238
+ n=n,
239
+ max_tokens=max_tokens,
240
+ requests_per_minute=30,
241
+ json_format=True
242
+ )
243
+
244
+ for query, response in zip(queries, responses):
245
+ new_options = response
246
+ if new_options and "distractors" in new_options:
247
+ query["option_5"] = new_options["distractors"].get("E", "")
248
+ query["option_6"] = new_options["distractors"].get("F", "")
249
+ query["option_7"] = new_options["distractors"].get("G", "")
250
+ query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
251
+ else:
252
+ query["option_5"] = ""
253
+ query["option_6"] = ""
254
+ query["option_7"] = ""
255
+ query["distractor_analysis"] = ""
256
+
257
+ return queries
258
+
259
+ # ๅฎšไน‰ๅผ‚ๆญฅๅค„็†ๅ‡ฝๆ•ฐ
260
+ async def generate_distractors_gradio(question, option1, option2, option3, option4, answer, answer_analysis):
261
+ query = {
262
+ 'question': question,
263
+ 'option_1': option1,
264
+ 'option_2': option2,
265
+ 'option_3': option3,
266
+ 'option_4': option4,
267
+ 'answer': answer,
268
+ 'answer_analysis': answer_analysis
269
+ }
270
+
271
+ queries = [query] # ๅ› ไธบๅ‡ฝๆ•ฐๆœŸๆœ›็š„ๆ˜ฏไธ€ไธชๅˆ—่กจ
272
+
273
+ # ่ฐƒ็”จๅผ‚ๆญฅ็”Ÿๆˆๅนฒๆ‰ฐ้กน็š„ๅ‡ฝๆ•ฐ
274
+ results = await generate_distractors_async(
275
+ model_name="gpt-4o-mini",
276
+ queries=queries,
277
+ n=1,
278
+ max_tokens=4096
279
+ )
280
+
281
+ # ๆๅ–็ป“ๆžœ
282
+ result = results[0]
283
+ new_options = {
284
+ 'E': result.get('option_5', ''),
285
+ 'F': result.get('option_6', ''),
286
+ 'G': result.get('option_7', '')
287
+ }
288
+ distractor_analysis = result.get('distractor_analysis', '')
289
+
290
+ # ่ฟ”ๅ›žๆ–ฐ็š„ๅนฒๆ‰ฐ้กนๅ’Œๅˆ†ๆž
291
+ return new_options, distractor_analysis
292
+
293
+
294
+ with gr.Blocks() as demo:
295
+ gr.Markdown("# ๅคšๅŠŸ่ƒฝGradioๅบ”็”จ")
296
+
297
+ with gr.Tabs():
298
+ with gr.TabItem("YouTube Creative Commonsๆฃ€ๆŸฅๅ™จ"):
299
+ gr.Markdown("## ๆฃ€ๆŸฅYouTube่ง†้ข‘ๆ˜ฏๅฆๅ…ทๆœ‰Creative Commons่ฎธๅฏ่ฏ")
300
+ youtube_url_input = gr.Textbox(label="YouTube่ง†้ข‘URL")
301
+ cc_license_output = gr.Textbox(label="ๆ˜ฏๅฆไธบCreative Commons่ฎธๅฏ่ฏ๏ผŸ")
302
+ check_button = gr.Button("ๆฃ€ๆŸฅ่ฎธๅฏ่ฏ")
303
+ check_button.click(
304
+ fn=check_cc_license,
305
+ inputs=youtube_url_input,
306
+ outputs=cc_license_output
307
+ )
308
+ with gr.TabItem("ๅคš้กน้€‰ๆ‹ฉ้ข˜ๅนฒๆ‰ฐ้กน็”Ÿๆˆๅ™จ"):
309
+ gr.Markdown("## ไธบๅคš้กน้€‰ๆ‹ฉ้ข˜็”Ÿๆˆๅนฒๆ‰ฐ้กน")
310
+ with gr.Row():
311
+ question_input = gr.Textbox(label="้—ฎ้ข˜", lines=2)
312
+ with gr.Row():
313
+ option1_input = gr.Textbox(label="้€‰้กนA")
314
+ option2_input = gr.Textbox(label="้€‰้กนB")
315
+ with gr.Row():
316
+ option3_input = gr.Textbox(label="้€‰้กนC")
317
+ option4_input = gr.Textbox(label="้€‰้กนD")
318
+ with gr.Row():
319
+ answer_input = gr.Textbox(label="ๆญฃ็กฎ็ญ”ๆกˆ")
320
+ with gr.Row():
321
+ answer_analysis_input = gr.Textbox(label="็ญ”ๆกˆ่งฃๆž", lines=3)
322
+ generate_button = gr.Button("็”Ÿๆˆๅนฒๆ‰ฐ้กน")
323
+ output_options = gr.JSON(label="็”Ÿๆˆ็š„ๅนฒๆ‰ฐ้€‰้กน")
324
+ output_analysis = gr.Textbox(label="ๅนฒๆ‰ฐ้กน่งฃๆž", lines=5)
325
+ generate_button.click(
326
+ fn=generate_distractors_gradio,
327
+ inputs=[question_input, option1_input, option2_input, option3_input, option4_input, answer_input, answer_analysis_input],
328
+ outputs=[output_options, output_analysis]
329
+ )
330
+
331
+ # ่ฟ่กŒGradioๅบ”็”จ
332
+ demo.launch()
333
 
 
 
utils/__pycache__/api_utils.cpython-310.pyc ADDED
Binary file (4.81 kB). View file
 
utils/__pycache__/generate_distractors.cpython-310.pyc ADDED
Binary file (4.4 kB). View file
 
utils/__pycache__/generate_translation.cpython-310.pyc ADDED
Binary file (2.85 kB). View file
 
utils/__pycache__/prompt.cpython-310.pyc ADDED
Binary file (3.59 kB). View file
 
utils/__pycache__/prompt.cpython-311.pyc ADDED
Binary file (5.88 kB). View file
 
utils/__pycache__/prompt.cpython-38.pyc ADDED
Binary file (3.59 kB). View file
 
utils/__pycache__/turkle.cpython-310.pyc ADDED
Binary file (3.3 kB). View file
 
utils/api_utils.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import numpy as np
3
+ from typing import Dict
4
+ import random
5
+
6
+ import asyncio
7
+ import logging
8
+ import os, json
9
+ from typing import Any
10
+ from aiohttp import ClientSession
11
+ from tqdm.asyncio import tqdm_asyncio
12
+ import random
13
+ from time import sleep
14
+
15
+ import aiolimiter
16
+
17
+ import openai
18
+ from openai import AsyncOpenAI, OpenAIError
19
+ from anthropic import AsyncAnthropic
20
+
21
+ async def _throttled_openai_chat_completion_acreate(
22
+ client: AsyncOpenAI,
23
+ model: str,
24
+ messages,
25
+ temperature: float,
26
+ max_tokens: int,
27
+ top_p: float,
28
+ limiter: aiolimiter.AsyncLimiter,
29
+ json_format: bool = False,
30
+ n: int = 1,
31
+ ):
32
+ async with limiter:
33
+ for _ in range(10):
34
+ try:
35
+ if json_format:
36
+ return await client.chat.completions.create(
37
+ model=model,
38
+ messages=messages,
39
+ temperature=temperature,
40
+ max_tokens=max_tokens,
41
+ top_p=top_p,
42
+ n=n,
43
+ response_format={"type": "json_object"},
44
+ )
45
+ else:
46
+ return await client.chat.completions.create(
47
+ model=model,
48
+ messages=messages,
49
+ temperature=temperature,
50
+ max_tokens=max_tokens,
51
+ top_p=top_p,
52
+ n=n,
53
+ )
54
+ except openai.RateLimitError as e:
55
+ print("Rate limit exceeded, retrying...")
56
+ sleep(random.randint(10, 20)) # ๅขžๅŠ ้‡่ฏ•็ญ‰ๅพ…ๆ—ถ้—ด
57
+ except openai.BadRequestError as e:
58
+ print(e)
59
+ return None
60
+ except OpenAIError as e:
61
+ print(e)
62
+ sleep(random.randint(5, 10))
63
+ return None
64
+
65
+ async def generate_from_openai_chat_completion(
66
+ client,
67
+ messages,
68
+ engine_name: str,
69
+ temperature: float = 1.0,
70
+ max_tokens: int = 512,
71
+ top_p: float = 1.0,
72
+ requests_per_minute: int = 100,
73
+ json_format: bool = False,
74
+ n: int = 1,
75
+ ):
76
+ # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168
77
+ delay = 60.0 / requests_per_minute
78
+ limiter = aiolimiter.AsyncLimiter(1, delay)
79
+ async_responses = [
80
+ _throttled_openai_chat_completion_acreate(
81
+ client,
82
+ model=engine_name,
83
+ messages=message,
84
+ temperature=temperature,
85
+ max_tokens=max_tokens,
86
+ top_p=top_p,
87
+ limiter=limiter,
88
+ json_format=json_format,
89
+ n=n,
90
+ )
91
+ for message in messages
92
+ ]
93
+
94
+ responses = await tqdm_asyncio.gather(*async_responses)
95
+
96
+ empty_dict = {
97
+ "question": "",
98
+ "options": {
99
+ "A": "",
100
+ "B": "",
101
+ "C": "",
102
+ "D": "",
103
+ },
104
+ "distractors": {
105
+ "E": "",
106
+ "F": "",
107
+ "G": "",
108
+ },
109
+ "correct_answer": ""
110
+ }
111
+ empty_str = ""
112
+ outputs = []
113
+ for response in responses:
114
+ if n == 1:
115
+ if json_format:
116
+ if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content:
117
+ outputs.append(json.loads(response.choices[0].message.content))
118
+ else:
119
+ outputs.append(empty_dict)
120
+ else:
121
+ if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content:
122
+ outputs.append(response.choices[0].message.content)
123
+ else:
124
+ outputs.append(empty_str)
125
+ else:
126
+ if json_format:
127
+ outputs.append([
128
+ json.loads(response.choices[i].message.content) if response and response.choices[i].message.content else empty_dict
129
+ for i in range(n)
130
+ ])
131
+ else:
132
+ outputs.append([
133
+ response.choices[i].message.content if response and response.choices[i].message.content else empty_str
134
+ for i in range(n)
135
+ ])
136
+ return outputs
137
+
138
+ async def _throttled_claude_chat_completion_acreate(
139
+ client: AsyncAnthropic,
140
+ model: str,
141
+ messages,
142
+ temperature: float,
143
+ max_tokens: int,
144
+ top_p: float,
145
+ limiter: aiolimiter.AsyncLimiter,
146
+ ):
147
+ async with limiter:
148
+ try:
149
+ return await client.messages.create(
150
+ model=model,
151
+ messages=messages,
152
+ temperature=temperature,
153
+ max_tokens=max_tokens,
154
+ top_p=top_p,
155
+ )
156
+ except:
157
+ return None
158
+
159
+ async def generate_from_claude_chat_completion(
160
+ client,
161
+ messages,
162
+ engine_name: str,
163
+ temperature: float = 1.0,
164
+ max_tokens: int = 512,
165
+ top_p: float = 1.0,
166
+ requests_per_minute: int = 100,
167
+ n: int = 1,
168
+ ):
169
+ # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168
170
+ delay = 60.0 / requests_per_minute
171
+ limiter = aiolimiter.AsyncLimiter(1, delay)
172
+
173
+ n_messages = []
174
+ for message in messages:
175
+ for _ in range(n):
176
+ n_messages.append(message)
177
+
178
+ async_responses = [
179
+ _throttled_claude_chat_completion_acreate(
180
+ client,
181
+ model=engine_name,
182
+ messages=message,
183
+ temperature=temperature,
184
+ max_tokens=max_tokens,
185
+ top_p=top_p,
186
+ limiter=limiter,
187
+ )
188
+ for message in n_messages
189
+ ]
190
+
191
+ responses = await tqdm_asyncio.gather(*async_responses)
192
+
193
+ outputs = []
194
+ if n == 1:
195
+ for response in responses:
196
+ if response and response.content and response.content[0] and response.content[0].text:
197
+ outputs.append(response.content[0].text)
198
+ else:
199
+ outputs.append("")
200
+ else:
201
+ idx = 0
202
+ for response in responses:
203
+ if idx % n == 0:
204
+ outputs.append([])
205
+ idx += 1
206
+ if response and response.content and response.content[0] and response.content[0].text:
207
+ outputs[-1].append(response.content[0].text)
208
+ else:
209
+ outputs[-1].append("")
210
+
211
+ return outputs
utils/generate_distractors.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from tqdm import tqdm
4
+ import os
5
+ import asyncio
6
+ from openai import AsyncOpenAI
7
+
8
+ from utils.api_utils import generate_from_openai_chat_completion, generate_from_claude_chat_completion
9
+
10
+
11
+ def construct_prompt_textonly(question: str, options: list, answer: str, answer_analysis: str) -> str:
12
+ optionized_list = [f"{chr(65 + i)}. {option}" for i, option in enumerate(options)]
13
+ optionized_str = "\n".join(optionized_list)
14
+
15
+ prompt = f"""
16
+ Generate a multiple-choice question with additional distractors that increase the complexity of answer selection. Follow these instructions:
17
+ 1. **Retain Original Structure**: Retain the original question and options.
18
+ 2. **Add Three Distractors**: Add three new distractors that are **plausible and maintain professional validity**. These should increase the difficulty but still be incorrect, based on the original question and answer analysis.
19
+ 3. **Use Answer Analysis**: Reference the **correct answer analysis** when creating distractors to ensure they challenge **subject-matter experts**.
20
+ 4. **Expert-Level Difficulty**: Keep the distractors **challenging and hard to distinguish** from the correct answer, requiring **advanced knowledge** to avoid the correct answer being too obvious.
21
+ 5. **Balanced Length**: Ensure all options have **similar lengths** to prevent any one option from standing out.
22
+ 6. **Distractors Analysis**: Provide a **distractor analysis in Chinese**, explaining why the distractors are **incorrect** but **challenging and hard to distinguish**.
23
+
24
+ Please output the result in valid JSON format using the structure below. Make sure there are no extra commas, missing commas, extra quotation marks or missing quotation marks:
25
+ {{
26
+ "question": "{question}",
27
+ "options": {{
28
+ "A": "{options[0]}",
29
+ "B": "{options[1]}",
30
+ "C": "{options[2]}",
31
+ "D": "{options[3]}"
32
+ }},
33
+ "distractors": {{
34
+ "E": "New distractor 1",
35
+ "F": "New distractor 2",
36
+ "G": "New distractor 3",
37
+ "analysis_of_distractors": "Use Chinese to explain why the distractors are **incorrect** but **challenging and hard to distinguish**, based on the question, options, and answer analysis.",
38
+ }},
39
+ "correct_answer": "{answer}",
40
+ }}
41
+
42
+ Input:
43
+ Question: {question}
44
+ Options:
45
+ {optionized_str}
46
+ Answer: {answer}
47
+ Answer Analysis: {answer_analysis}
48
+ """
49
+
50
+ # prompt = prompt.replace("I don't know.", "Idle.")
51
+ return prompt
52
+
53
+
54
+ def prepare_q_text_input(query, prompt_func=construct_prompt_textonly):
55
+ question = query['question']
56
+ options = [query['option_1'], query['option_2'], query['option_3'], query['option_4']]
57
+ gt = query['answer']
58
+ answer_analysis = query['answer_analysis']
59
+
60
+ q_text_prompt = prompt_func(question=question, options=options, answer=gt, answer_analysis=answer_analysis)
61
+ return q_text_prompt
62
+
63
+
64
+ def prepare_q_inputs(queries):
65
+ messages = []
66
+ for i, query in enumerate(queries):
67
+ q_text_prompt = prepare_q_text_input(query)
68
+
69
+ prompt_message = [
70
+ {
71
+ "role": "user",
72
+ "content": q_text_prompt,
73
+ },
74
+ ]
75
+
76
+ messages.append(prompt_message)
77
+ return messages
78
+
79
+
80
+
81
+ # def extract_json_from_text(text):
82
+ # text = json.dumps(text)
83
+ # # ็งป้™ค่ฝฌไน‰็ฌฆๅ’Œๆข่กŒ็ฌฆ
84
+ # text = text.replace('\\n', '').replace('\\"', '"')
85
+
86
+ # # ๅฎšไน‰ๅŒน้… JSON ๅฏน่ฑก็š„ๆญฃๅˆ™่กจ่พพๅผๆจกๅผ
87
+ # json_pattern = re.compile(
88
+ # r'\{\s*"question":\s*"([^"]*)",\s*"options":\s*\{\s*"A":\s*"([^"]*)",\s*"B":\s*"([^"]*)",\s*"C":\s*"([^"]*)",\s*"D":\s*"([^"]*)"\s*\},'
89
+ # r'\s*"distractors":\s*\{\s*"E":\s*"([^"]*)",\s*"F":\s*"([^"]*)",\s*"G":\s*"([^"]*)"\s*\},\s*"correct_answer":\s*"([^"]*)"\s*\}',
90
+ # re.DOTALL
91
+ # )
92
+
93
+ # # ๅŒน้… JSON ็ป“ๆž„
94
+ # match = json_pattern.search(text)
95
+
96
+ # if match:
97
+ # # ๆ•่Žทๅˆฐ็š„ๅŒน้…็ป„
98
+ # question = match.group(1)
99
+ # option_a = match.group(2)
100
+ # option_b = match.group(3)
101
+ # option_c = match.group(4)
102
+ # option_d = match.group(5)
103
+ # distractor_e = match.group(6)
104
+ # distractor_f = match.group(7)
105
+ # distractor_g = match.group(8)
106
+ # correct_answer = match.group(9)
107
+
108
+ # # ๆž„ๅปบ JSON ๅฏน่ฑก
109
+ # json_data = {
110
+ # "question": question,
111
+ # "options": {
112
+ # "A": option_a,
113
+ # "B": option_b,
114
+ # "C": option_c,
115
+ # "D": option_d
116
+ # },
117
+ # "distractors": {
118
+ # "E": distractor_e,
119
+ # "F": distractor_f,
120
+ # "G": distractor_g
121
+ # },
122
+ # "correct_answer": correct_answer
123
+ # }
124
+
125
+ # return json_data
126
+ # else:
127
+ # print("No JSON object found in the text.")
128
+ # return None
129
+
130
+
131
+ def generate_distractors(model_name: str,
132
+ queries: list,
133
+ n: int=1,
134
+ max_tokens: int=4096):
135
+
136
+ assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
137
+
138
+ client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"),base_url="https://yanlp.zeabur.app/v1")
139
+ messages = prepare_q_inputs(queries)
140
+
141
+ responses = asyncio.run(
142
+ generate_from_openai_chat_completion(
143
+ client,
144
+ messages=messages,
145
+ engine_name=model_name,
146
+ n = n,
147
+ max_tokens=max_tokens,
148
+ requests_per_minute=30,
149
+ json_format=True
150
+ )
151
+ )
152
+
153
+ for query, response in zip(queries, responses):
154
+ new_options = response
155
+ # print(new_options)
156
+ if new_options and "distractors" in new_options:
157
+ query["option_5"] = new_options["distractors"].get("E", "")
158
+ else:
159
+ query["option_5"] = ""
160
+ if new_options and "distractors" in new_options:
161
+ query["option_6"] = new_options["distractors"].get("F", "")
162
+ else:
163
+ query["option_6"] = ""
164
+ if new_options and "distractors" in new_options:
165
+ query["option_7"] = new_options["distractors"].get("G", "")
166
+ else:
167
+ query["option_7"] = ""
168
+ if new_options and "distractors" in new_options:
169
+ query["distractor_analysis"] = new_options["distractors"].get("analysis_of_distractors", "")
170
+ else:
171
+ query["distractor_analysis"] = ""
172
+
173
+ return queries
174
+
175
+
176
+
177
+
178
+
179
+
utils/generate_translation.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from tqdm import tqdm
4
+ import os
5
+ import asyncio
6
+ from openai import AsyncOpenAI
7
+
8
+ from utils.api_utils import generate_from_openai_chat_completion, generate_from_claude_chat_completion
9
+
10
+
11
+ def construct_translate_prompt_textonly(question: str, options: list, text_only_analysis: str) -> str:
12
+ optionized_list = [f"{chr(65 + i)}. {option}" for i, option in enumerate(options)]
13
+ QA_str = question + "\n" + "\n".join(optionized_list)
14
+
15
+ prompt = f"""
16
+ Please translate the following inputs into Chinese, ensuring they maintain a professional tone. If the input is empty, return an empty string.
17
+
18
+ Output the result in valid JSON format using the structure provided below. Be careful to avoid extra commas or missing quotation marks:
19
+ {{
20
+ "QA": "The translation of QA str",
21
+ "ToA" "The translation of text_only_analysis.",
22
+ }}
23
+
24
+ Input:
25
+ QA: {QA_str}
26
+ text_only_analysis: {text_only_analysis}
27
+ """
28
+
29
+ # prompt = prompt.replace("I don't know.", "Idle.")
30
+ return prompt
31
+
32
+
33
+ def prepare_q_text_input_translation(query, prompt_func=construct_translate_prompt_textonly):
34
+ question = query['question']
35
+ options = [query['option_1'], query['option_2'], query['option_3'], query['option_4'],query['option_5'],query['option_6'],query['option_7']]
36
+ text_only_analysis = query['text_only_example_response']
37
+
38
+ q_text_prompt = prompt_func(question=question, options=options, text_only_analysis=text_only_analysis)
39
+ return q_text_prompt
40
+
41
+
42
+ def prepare_q_inputs_translation(queries):
43
+ messages = []
44
+ for i, query in enumerate(queries):
45
+ q_text_prompt = prepare_q_text_input_translation(query)
46
+
47
+ prompt_message = [
48
+ {
49
+ "role": "user",
50
+ "content": q_text_prompt,
51
+ },
52
+ ]
53
+
54
+ messages.append(prompt_message)
55
+ return messages
56
+
57
+
58
+ def generate_translation(model_name: str,
59
+ queries: list,
60
+ n: int=1,
61
+ max_tokens: int=2048):
62
+
63
+ assert model_name in ["gpt-4o-mini", "gpt-4-turbo", "gpt-4o", "gpt-4o-2024-08-06"], "Invalid model name"
64
+
65
+ client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"),base_url="https://yanlp.zeabur.app/v1")
66
+ messages = prepare_q_inputs_translation(queries)
67
+
68
+ responses = asyncio.run(
69
+ generate_from_openai_chat_completion(
70
+ client,
71
+ messages=messages,
72
+ engine_name=model_name,
73
+ n = n,
74
+ max_tokens=max_tokens,
75
+ requests_per_minute=30,
76
+ json_format=True
77
+ )
78
+ )
79
+
80
+ for query, response in zip(queries, responses):
81
+ new_options = response
82
+ # print(new_options)
83
+ if new_options:
84
+ query["QA_translation"] = new_options.get("QA", "")
85
+ else:
86
+ query["QA_translation"] = ""
87
+ if new_options:
88
+ query["text_only_example_response_translation"] = new_options.get("ToA", "")
89
+ else:
90
+ query["text_only_example_response_translation"] = ""
91
+
92
+ return queries
93
+
94
+
95
+
96
+
97
+
98
+