tensorkelechi commited on
Commit
27558c9
1 Parent(s): be9b3bc

Reproduce Vidtext with distilled whisper

Browse files
Files changed (1) hide show
  1. app.py +4 -17
app.py CHANGED
@@ -4,25 +4,17 @@ from transformers import pipeline
4
  from pytube import YouTube
5
  from pydub import AudioSegment
6
  from audio_extract import extract_audio
7
- import google.generativeai as google_genai
8
-
9
  import os
10
  from dotenv import load_dotenv
11
 
12
-
13
-
14
  load_dotenv()
15
 
16
- GOOGLE_API_KEY =os.getenv("GOOGLE_API_KEY")
17
-
18
- google_genai.configure(api_key=GOOGLE_API_KEY)
19
-
20
  st.set_page_config(
21
- page_title="VidText"
22
  )
23
 
24
- st.title('Vidtext_whisper')
25
- st.write('A web app for video/audio transcription(Youtube, mp4, mp3)')
26
 
27
 
28
  def youtube_video_downloader(url):
@@ -57,7 +49,7 @@ def audio_processing(mp3_audio):
57
 
58
  @st.cache_resource
59
  def load_asr_model():
60
- asr_model = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")
61
  return asr_model
62
 
63
  transcriber_model = load_asr_model()
@@ -66,11 +58,6 @@ def transcriber_pass(processed_audio):
66
  text_extract = transcriber_model(processed_audio)
67
  return text_extract['text']
68
 
69
- def generate_ai_summary(transcript):
70
- model = google_genai.GenerativeModel('gemini-pro')
71
- model_response = model.generate_content([f"Give a summary of the text {transcript}"], stream=True)
72
- return model_response.text
73
-
74
 
75
 
76
  # Streamlit UI
 
4
  from pytube import YouTube
5
  from pydub import AudioSegment
6
  from audio_extract import extract_audio
 
 
7
  import os
8
  from dotenv import load_dotenv
9
 
 
 
10
  load_dotenv()
11
 
 
 
 
 
12
  st.set_page_config(
13
+ page_title="VidText_distilled"
14
  )
15
 
16
+ st.title('Vidtext_distilwhisper')
17
+ st.write('A web app for video/audio transcription(Youtube, mp4, mp3). Using distilled Whisper')
18
 
19
 
20
  def youtube_video_downloader(url):
 
49
 
50
  @st.cache_resource
51
  def load_asr_model():
52
+ asr_model = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-large-v3")
53
  return asr_model
54
 
55
  transcriber_model = load_asr_model()
 
58
  text_extract = transcriber_model(processed_audio)
59
  return text_extract['text']
60
 
 
 
 
 
 
61
 
62
 
63
  # Streamlit UI