Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,18 +10,19 @@ RAPIDAPI_HOST = "youtube-transcriptor.p.rapidapi.com"
|
|
10 |
|
11 |
# ์ ํ๋ธ URL์์ ๋น๋์ค ID๋ฅผ ์ถ์ถํ๋ ํจ์
|
12 |
def get_video_id(youtube_url):
|
13 |
-
# ์ผ๋ฐ์ ์ธ YouTube URL ๋๋ youtu.be ๋จ์ถ URL
|
14 |
video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url)
|
15 |
return video_id_match.group(0) if video_id_match else None
|
16 |
|
17 |
# ์๋ง ์ธ์ด ์ฐ์ ์์ ๋ฆฌ์คํธ
|
18 |
LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh']
|
19 |
|
20 |
-
# ์ ํ๋ธ ์๋ง์ ์์ฒญํ๋ ํจ์
|
21 |
def get_youtube_transcript(youtube_url):
|
22 |
# ๋น๋์ค ID ์ถ์ถ
|
23 |
video_id = get_video_id(youtube_url)
|
24 |
if video_id is None:
|
|
|
25 |
return {"error": "์๋ชป๋ ์ ํ๋ธ URL์
๋๋ค. ๋น๋์ค ID๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."}
|
26 |
|
27 |
url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
|
@@ -34,21 +35,39 @@ def get_youtube_transcript(youtube_url):
|
|
34 |
# ์ธ์ด ์ฐ์ ์์์ ๋ฐ๋ผ ์์ฐจ์ ์ผ๋ก ์์ฒญ์ ์๋
|
35 |
for lang in LANGUAGE_PRIORITY:
|
36 |
querystring = {"video_id": video_id, "lang": lang}
|
|
|
|
|
37 |
response = requests.get(url, headers=headers, params=querystring)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
if response.status_code == 200:
|
40 |
try:
|
|
|
41 |
data = response.json()
|
42 |
-
|
|
|
|
|
|
|
|
|
43 |
if "transcription" in data and isinstance(data["transcription"], list):
|
44 |
-
#
|
45 |
return {"language": lang, "transcription": data["transcription"]}
|
46 |
else:
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
51 |
# ์๋ง์ ์ฐพ์ง ๋ชปํ ๊ฒฝ์ฐ
|
|
|
52 |
return {"error": "์ฐ์ ์์ ์ธ์ด๋ก ์๋ง์ ์ฐพ์ ์ ์์ต๋๋ค."}
|
53 |
|
54 |
# Gradio ์ธํฐํ์ด์ค ์ ์
|
@@ -56,7 +75,7 @@ def youtube_transcript_interface(youtube_url):
|
|
56 |
# ์๋ง ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ
|
57 |
transcript_data = get_youtube_transcript(youtube_url)
|
58 |
|
59 |
-
#
|
60 |
return json.dumps(transcript_data, ensure_ascii=False, indent=2)
|
61 |
|
62 |
# Gradio ์ธํฐํ์ด์ค ์์ฑ
|
|
|
10 |
|
11 |
# ์ ํ๋ธ URL์์ ๋น๋์ค ID๋ฅผ ์ถ์ถํ๋ ํจ์
|
12 |
def get_video_id(youtube_url):
|
13 |
+
# ์ ํ๋ธ URL์์ video_id ์ถ์ถ (์ผ๋ฐ์ ์ธ YouTube URL ๋๋ youtu.be ๋จ์ถ URL ์ง์)
|
14 |
video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url)
|
15 |
return video_id_match.group(0) if video_id_match else None
|
16 |
|
17 |
# ์๋ง ์ธ์ด ์ฐ์ ์์ ๋ฆฌ์คํธ
|
18 |
LANGUAGE_PRIORITY = ['ko', 'en', 'ja', 'zh']
|
19 |
|
20 |
+
# ์ ํ๋ธ ์๋ง์ ์์ฒญํ๋ ํจ์ (์ธ์ด ์ฐ์ ์์๋ฅผ ์ ์ฉํ์ฌ ์๋)
|
21 |
def get_youtube_transcript(youtube_url):
|
22 |
# ๋น๋์ค ID ์ถ์ถ
|
23 |
video_id = get_video_id(youtube_url)
|
24 |
if video_id is None:
|
25 |
+
print("ERROR: ์๋ชป๋ ์ ํ๋ธ URL์
๋๋ค. ๋น๋์ค ID๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
26 |
return {"error": "์๋ชป๋ ์ ํ๋ธ URL์
๋๋ค. ๋น๋์ค ID๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."}
|
27 |
|
28 |
url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
|
|
|
35 |
# ์ธ์ด ์ฐ์ ์์์ ๋ฐ๋ผ ์์ฐจ์ ์ผ๋ก ์์ฒญ์ ์๋
|
36 |
for lang in LANGUAGE_PRIORITY:
|
37 |
querystring = {"video_id": video_id, "lang": lang}
|
38 |
+
print(f"์์ฒญ ์ค: {lang} ์ธ์ด์ ๋ํ ์๋ง ์์ฒญ")
|
39 |
+
|
40 |
response = requests.get(url, headers=headers, params=querystring)
|
41 |
|
42 |
+
# ์ํ ์ฝ๋ ์ถ๋ ฅ
|
43 |
+
print(f"Response status code: {response.status_code}")
|
44 |
+
|
45 |
+
# ์์ ์๋ต ๋ฐ์ดํฐ๋ฅผ ์ถ๋ ฅ
|
46 |
+
print("Raw response content:", response.text)
|
47 |
+
|
48 |
+
# ์ํ ์ฝ๋๊ฐ 200์ผ ๋๋ง ์ฒ๋ฆฌ
|
49 |
if response.status_code == 200:
|
50 |
try:
|
51 |
+
# JSON์ผ๋ก ์๋ต์ ํ์ฑ
|
52 |
data = response.json()
|
53 |
+
|
54 |
+
# ์๋ต ๋ฐ์ดํฐ ์ถ๋ ฅ
|
55 |
+
print("Parsed JSON response:", json.dumps(data, indent=2, ensure_ascii=False))
|
56 |
+
|
57 |
+
# transcription ํ๋๋ฅผ ํตํด ์๋ง ๋ฐ์ดํฐ๊ฐ ์กด์ฌํ๋์ง ํ์ธ
|
58 |
if "transcription" in data and isinstance(data["transcription"], list):
|
59 |
+
# ์๋ง ๋ฐ์ดํฐ๋ฅผ JSON ๋ฆฌ์คํธ๋ก ๋ฐํ
|
60 |
return {"language": lang, "transcription": data["transcription"]}
|
61 |
else:
|
62 |
+
print(f"ERROR: ์๋ง ๋ฐ์ดํฐ๋ฅผ {lang} ์ธ์ด๋ก ์ฐพ์ ์ ์์ต๋๋ค.")
|
63 |
+
return {"error": f"{lang} ์ธ์ด๋ก ์๋ง ๋ฐ์ดํฐ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."}
|
64 |
+
|
65 |
+
except json.JSONDecodeError as e:
|
66 |
+
print(f"ERROR: JSON ๋์ฝ๋ฉ ์ค๋ฅ ๋ฐ์: {str(e)}")
|
67 |
+
return {"error": f"JSON ๋์ฝ๋ฉ ์ค๋ฅ ๋ฐ์: {str(e)}"}
|
68 |
+
|
69 |
# ์๋ง์ ์ฐพ์ง ๋ชปํ ๊ฒฝ์ฐ
|
70 |
+
print("ERROR: ์ฐ์ ์์ ์ธ์ด๋ก ์๋ง์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
71 |
return {"error": "์ฐ์ ์์ ์ธ์ด๋ก ์๋ง์ ์ฐพ์ ์ ์์ต๋๋ค."}
|
72 |
|
73 |
# Gradio ์ธํฐํ์ด์ค ์ ์
|
|
|
75 |
# ์๋ง ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ
|
76 |
transcript_data = get_youtube_transcript(youtube_url)
|
77 |
|
78 |
+
# ๊ฒฐ๊ณผ ์ถ๋ ฅ
|
79 |
return json.dumps(transcript_data, ensure_ascii=False, indent=2)
|
80 |
|
81 |
# Gradio ์ธํฐํ์ด์ค ์์ฑ
|