youtube_test / app.py
SUHHHH's picture
Update app.py
91fe522 verified
from pytube import YouTube
from google.cloud import speech_v1p1beta1 as speech
from pydub import AudioSegment
import io
import openai
import os
import gradio as gr
# 유튜브 λΉ„λ””μ˜€ ID μΆ”μΆœ ν•¨μˆ˜
def get_yt_video_id(url):
from urllib.parse import urlparse, parse_qs
if url.startswith(('youtu', 'www')):
url = 'http://' + url
query = urlparse(url)
if 'youtube' in query.hostname:
if query.path == '/watch':
return parse_qs(query.query)['v'][0]
elif query.path.startswith(('/embed/', '/v/')):
return query.path.split('/')[2]
elif 'youtu.be' in query.hostname:
return query.path[1:]
else:
raise ValueError("μœ νš¨ν•œ 유튜브 링크가 μ•„λ‹™λ‹ˆλ‹€.")
# μ˜€λ””μ˜€ μΆ”μΆœ 및 λ³€ν™˜ ν•¨μˆ˜ (WAV ν˜•μ‹μœΌλ‘œ λ³€ν™˜)
def download_and_convert_audio(youtube_url):
yt = YouTube(youtube_url)
stream = yt.streams.filter(only_audio=True).first()
audio_path = stream.download(filename="audio.mp4")
# μ˜€λ””μ˜€ νŒŒμΌμ„ WAV둜 λ³€ν™˜ (16000Hz μƒ˜ν”Œ 레이트)
audio = AudioSegment.from_file(audio_path)
wav_audio_path = "converted_audio.wav"
audio.set_frame_rate(16000).set_channels(1).export(wav_audio_path, format="wav")
return wav_audio_path
# μ˜€λ””μ˜€λ₯Ό 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜
def split_audio(audio_path, chunk_length_ms=60000):
audio = AudioSegment.from_wav(audio_path)
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
return chunks
# Google Speech-to-Text APIλ₯Ό μ‚¬μš©ν•˜μ—¬ μ˜€λ””μ˜€λ₯Ό ν…μŠ€νŠΈλ‘œ λ³€ν™˜
def speech_to_text(audio_path):
client = speech.SpeechClient()
chunks = split_audio(audio_path) # μ˜€λ””μ˜€λ₯Ό 청크둜 λ‚˜λˆ”
transcript = ""
for chunk in chunks:
with io.BytesIO() as audio_file:
chunk.export(audio_file, format="wav")
audio_file.seek(0)
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000, # 16000Hz μƒ˜ν”Œ 레이트
language_code="ko-KR" # ν•œκ΅­μ–΄ 인식
)
response = client.recognize(config=config, audio=audio)
for result in response.results:
transcript += result.alternatives[0].transcript + " "
return transcript.strip()
# ν…μŠ€νŠΈλ₯Ό μš”μ•½ν•˜λŠ” ν•¨μˆ˜ (OpenAI API μ‚¬μš©)
def textToSummary(text):
openai.api_key = os.getenv("OPENAI_API_KEY") # ν™˜κ²½ λ³€μˆ˜μ—μ„œ OpenAI API ν‚€ κ°€μ Έμ˜€κΈ°
response = openai.Completion.create(
model="text-davinci-003",
prompt="Summarize this in 200 words or less:\n\n" + text,
temperature=0.7,
max_tokens=400,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=1
)
return response["choices"][0]["text"].replace("\n", " ").strip()
# 전체 μš”μ•½ ν”„λ‘œμ„ΈμŠ€λ₯Ό μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
def summarize(url):
try:
# 유튜브 μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ 및 λ³€ν™˜
audio_path = download_and_convert_audio(url)
# μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
transcript = speech_to_text(audio_path)
# ν…μŠ€νŠΈ μš”μ•½
summary = textToSummary(transcript)
return summary
except Exception as e:
return f"μš”μ•½μ— μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
# Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
description = "유튜브 λ™μ˜μƒμ˜ μžλ§‰μ΄ 없더라도 μŒμ„± 인식 κΈ°λŠ₯을 μ‚¬μš©ν•΄ μš”μ•½ν•©λ‹ˆλ‹€."
gr.Interface(fn=summarize,
inputs="text",
outputs="textbox",
description=description
).launch()