Spaces:
Sleeping
Sleeping
from pytube import YouTube | |
from google.cloud import speech_v1p1beta1 as speech | |
from pydub import AudioSegment | |
import io | |
import openai | |
import os | |
import gradio as gr | |
# μ νλΈ λΉλμ€ ID μΆμΆ ν¨μ | |
def get_yt_video_id(url): | |
from urllib.parse import urlparse, parse_qs | |
if url.startswith(('youtu', 'www')): | |
url = 'http://' + url | |
query = urlparse(url) | |
if 'youtube' in query.hostname: | |
if query.path == '/watch': | |
return parse_qs(query.query)['v'][0] | |
elif query.path.startswith(('/embed/', '/v/')): | |
return query.path.split('/')[2] | |
elif 'youtu.be' in query.hostname: | |
return query.path[1:] | |
else: | |
raise ValueError("μ ν¨ν μ νλΈ λ§ν¬κ° μλλλ€.") | |
# μ€λμ€ μΆμΆ λ° λ³ν ν¨μ (WAV νμμΌλ‘ λ³ν) | |
def download_and_convert_audio(youtube_url): | |
yt = YouTube(youtube_url) | |
stream = yt.streams.filter(only_audio=True).first() | |
audio_path = stream.download(filename="audio.mp4") | |
# μ€λμ€ νμΌμ WAVλ‘ λ³ν (16000Hz μν λ μ΄νΈ) | |
audio = AudioSegment.from_file(audio_path) | |
wav_audio_path = "converted_audio.wav" | |
audio.set_frame_rate(16000).set_channels(1).export(wav_audio_path, format="wav") | |
return wav_audio_path | |
# μ€λμ€λ₯Ό μ²ν¬λ‘ λλλ ν¨μ | |
def split_audio(audio_path, chunk_length_ms=60000): | |
audio = AudioSegment.from_wav(audio_path) | |
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)] | |
return chunks | |
# Google Speech-to-Text APIλ₯Ό μ¬μ©νμ¬ μ€λμ€λ₯Ό ν μ€νΈλ‘ λ³ν | |
def speech_to_text(audio_path): | |
client = speech.SpeechClient() | |
chunks = split_audio(audio_path) # μ€λμ€λ₯Ό μ²ν¬λ‘ λλ | |
transcript = "" | |
for chunk in chunks: | |
with io.BytesIO() as audio_file: | |
chunk.export(audio_file, format="wav") | |
audio_file.seek(0) | |
content = audio_file.read() | |
audio = speech.RecognitionAudio(content=content) | |
config = speech.RecognitionConfig( | |
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, | |
sample_rate_hertz=16000, # 16000Hz μν λ μ΄νΈ | |
language_code="ko-KR" # νκ΅μ΄ μΈμ | |
) | |
response = client.recognize(config=config, audio=audio) | |
for result in response.results: | |
transcript += result.alternatives[0].transcript + " " | |
return transcript.strip() | |
# ν μ€νΈλ₯Ό μμ½νλ ν¨μ (OpenAI API μ¬μ©) | |
def textToSummary(text): | |
openai.api_key = os.getenv("OPENAI_API_KEY") # νκ²½ λ³μμμ OpenAI API ν€ κ°μ Έμ€κΈ° | |
response = openai.Completion.create( | |
model="text-davinci-003", | |
prompt="Summarize this in 200 words or less:\n\n" + text, | |
temperature=0.7, | |
max_tokens=400, | |
top_p=1.0, | |
frequency_penalty=0.0, | |
presence_penalty=1 | |
) | |
return response["choices"][0]["text"].replace("\n", " ").strip() | |
# μ 체 μμ½ νλ‘μΈμ€λ₯Ό μ²λ¦¬νλ ν¨μ | |
def summarize(url): | |
try: | |
# μ νλΈ μ€λμ€ λ€μ΄λ‘λ λ° λ³ν | |
audio_path = download_and_convert_audio(url) | |
# μμ±μ ν μ€νΈλ‘ λ³ν | |
transcript = speech_to_text(audio_path) | |
# ν μ€νΈ μμ½ | |
summary = textToSummary(transcript) | |
return summary | |
except Exception as e: | |
return f"μμ½μ μ€ν¨νμ΅λλ€: {str(e)}" | |
# Gradio μΈν°νμ΄μ€ μ€μ | |
description = "μ νλΈ λμμμ μλ§μ΄ μλλΌλ μμ± μΈμ κΈ°λ₯μ μ¬μ©ν΄ μμ½ν©λλ€." | |
gr.Interface(fn=summarize, | |
inputs="text", | |
outputs="textbox", | |
description=description | |
).launch() | |