|
import moviepy.editor as mp |
|
import speech_recognition as sr |
|
import os |
|
|
|
def transcribe_audio_chunk(audio_chunk): |
|
recognizer = sr.Recognizer() |
|
with sr.AudioFile(audio_chunk) as source: |
|
audio_data = recognizer.record(source) |
|
|
|
try: |
|
text_result = recognizer.recognize_google(audio_data, language='en-US') |
|
return text_result |
|
except sr.UnknownValueError: |
|
print("Speech Recognition could not understand audio") |
|
return "" |
|
except sr.RequestError as e: |
|
print(f"Could not request results from Google Speech Recognition service; {e}") |
|
return "" |
|
|
|
def transcribe_video(video_path): |
|
|
|
video_clip = mp.VideoFileClip(video_path) |
|
audio_clip = video_clip.audio |
|
audio_clip.write_audiofile("temp_audio.wav") |
|
|
|
|
|
chunk_duration = 10 |
|
total_duration = audio_clip.duration |
|
chunk_paths = [] |
|
|
|
for start_time in range(0, int(total_duration), chunk_duration): |
|
end_time = min(start_time + chunk_duration, total_duration) |
|
chunk_path = f"temp_audio_chunk_{start_time}_{end_time}.wav" |
|
audio_chunk = audio_clip.subclip(start_time, end_time) |
|
audio_chunk.write_audiofile(chunk_path) |
|
chunk_paths.append(chunk_path) |
|
|
|
|
|
transcribed_texts = [] |
|
for chunk_path in chunk_paths: |
|
text_result = transcribe_audio_chunk(chunk_path) |
|
transcribed_texts.append(text_result) |
|
|
|
|
|
final_transcription = " ".join(transcribed_texts) |
|
print("Transcription:\n", final_transcription) |
|
|
|
|
|
audio_clip.close() |
|
video_clip.close() |
|
os.remove("temp_audio.wav") |
|
for chunk_path in chunk_paths: |
|
os.remove(chunk_path) |
|
|
|
|
|
video_path = "C:/Users/HP/Downloads/Video/1.mp4" |
|
transcribe_video(video_path) |
|
|