chatPDF / video_transcriber.py
bipin
update to initial prompting for StyleVehicle
a1bee8a
raw
history blame
1.98 kB
import moviepy.editor as mp
import speech_recognition as sr
import os
def transcribe_audio_chunk(audio_chunk):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_chunk) as source:
audio_data = recognizer.record(source)
try:
text_result = recognizer.recognize_google(audio_data, language='en-US')
return text_result
except sr.UnknownValueError:
print("Speech Recognition could not understand audio")
return ""
except sr.RequestError as e:
print(f"Could not request results from Google Speech Recognition service; {e}")
return ""
def transcribe_video(video_path):
# Step 1: Extract audio from the video
video_clip = mp.VideoFileClip(video_path)
audio_clip = video_clip.audio
audio_clip.write_audiofile("temp_audio.wav")
# Step 2: Split the audio into smaller chunks (e.g., 10 seconds each)
chunk_duration = 10 # in seconds
total_duration = audio_clip.duration
chunk_paths = []
for start_time in range(0, int(total_duration), chunk_duration):
end_time = min(start_time + chunk_duration, total_duration)
chunk_path = f"temp_audio_chunk_{start_time}_{end_time}.wav"
audio_chunk = audio_clip.subclip(start_time, end_time)
audio_chunk.write_audiofile(chunk_path)
chunk_paths.append(chunk_path)
# Step 3: Transcribe each audio chunk
transcribed_texts = []
for chunk_path in chunk_paths:
text_result = transcribe_audio_chunk(chunk_path)
transcribed_texts.append(text_result)
# Step 4: Concatenate the transcribed texts
final_transcription = " ".join(transcribed_texts)
print("Transcription:\n", final_transcription)
# Clean up temporary files
audio_clip.close()
video_clip.close()
os.remove("temp_audio.wav")
for chunk_path in chunk_paths:
os.remove(chunk_path)
# Example usage
video_path = "C:/Users/HP/Downloads/Video/1.mp4"
transcribe_video(video_path)