import moonshine import streamlit as st import time, os from pytube import YouTube from pydub import AudioSegment os.environ["KERAS_BACKEND"] = "jax" st.set_page_config(page_title="vidkit_v2") st.title("vidkit") st.write("App for video/audio transcription(Youtube, mp4, mp3)") st.write("Built to solve a personal problem, might be useful to others too :)") def youtube_video_downloader(url: str): yt_vid = YouTube(url) vid_dld = ( yt_vid.streams.filter(progressive=True, file_extension="mp4") .order_by("resolution") .desc() .first() ) vid_dld = vid_dld.download() title = yt_vid.title return vid_dld, title def audio_extraction(video_file: str): audio = AudioSegment.from_file(video_file, format="mp4") audio_path = "audio.wav" audio.export(audio_path, format="wav") return audio_path def audio_processing(mp3_audio): audio = AudioSegment.from_file(mp3_audio, format="mp3") wav_file = "audio_file.wav" audio = audio.export(wav_file, format="wav") return wav_file def transcriber_pass(processed_audio): stime = time.time() # transcribe with moonshine text_extract = moonshine.transcribe(processed_audio, "moonshine/base") time_taken = time.time() - stime st.write(f'transcribed in {time_taken:.4f}s') return text_extract[0] # Streamlit UI youtube_url_tab, file_select_tab, audio_file_tab = st.tabs( ["Youtube URL", "Video file", "Audio file"] ) with youtube_url_tab: url = st.text_input("Enter the Youtube url") try: yt_video, title = youtube_video_downloader(url) if url: if st.button("Transcribe", key="yturl"): with st.spinner("Transcribing..."): with st.spinner("Extracting audio..."): audio = audio_extraction(yt_video) ytvideo_transcript = transcriber_pass(audio) st.write(f"Video title: {title}") st.write("___") st.markdown( f"""

-> {ytvideo_transcript}

""", unsafe_allow_html=True, ) except Exception as e: st.error(e) # Video file transcription with file_select_tab: uploaded_video_file = st.file_uploader("Upload video file", type="mp4") try: if uploaded_video_file: if st.button("Transcribe", key="vidfile"): with st.spinner("Transcribing..."): with st.spinner("Extracting audio..."): audio = audio_extraction(uploaded_video_file) video_transcript = transcriber_pass(audio) st.success(f"Transcription successful") st.markdown( f"""

-> {video_transcript}

""", unsafe_allow_html=True, ) except Exception as e: st.error(e) # Audio file transcription with audio_file_tab: audio_file = st.file_uploader("Upload audio file", type="mp3") try: # ensure audio file is present if audio_file: if st.button("Transcribe", key="audiofile"): with st.spinner("Transcribing..."): processed_audio = audio_processing(audio_file) # extract audio/preprocess audio_transcript = transcriber_pass(processed_audio) st.success(f"Transcription successful") st.markdown( f"""

-> {audio_transcript}

""", unsafe_allow_html=True, ) except Exception as e: st.error(e) # Footer st.write("") st.write("") st.write("") st.markdown( """
Project by tensor_kelechi
""", unsafe_allow_html=True, ) # Arigato :)