Spaces:

deven367
/

yt-video-annotator-hf

Sleeping

App Files Files Community

deven367 commited on Oct 18, 2023

Commit

6dae90f

1 Parent(s): 3555723

sync with github

Browse files

Files changed (6) hide show

README.md +17 -1
annotator/__init__.py +0 -0
annotator/utils.py +10 -13
app.py +43 -27
dev-requirements.txt +1 -0
requirements.txt +5 -3

README.md CHANGED Viewed

@@ -9,4 +9,20 @@ app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at <https://huggingface.co/docs/hub/spaces-config-reference>

 pinned: false
 ---
+## Try the app on hf-spaces
+You can find the deployed app &rarr; [**here**](https://huggingface.co/spaces/deven367/yt-video-annotator/)
+> **Note**
+> The app inference is slow as the inference is running on a CPU, if you have GPU on your local system, the app will work a lot faster.
+## Installation
+1. Create a virtual env with the environment manager of your choice
+2. Activate the environment
+3. Install the dependencies using ```pip install -e .```
+4. To run the app locally in your terminal, type `run_app`
+## Contributing
+Issues and PRs are welcome. If you want me to implement a feature, create a Feature Request in the issues, I'll try my best to implement it.

annotator/__init__.py ADDED Viewed

File without changes

annotator/utils.py CHANGED Viewed

@@ -1,11 +1,12 @@
-import whisper
 import datetime
-import pandas as pd
-import numpy as np
 import subprocess
-from fastcore.foundation import working_directory, L
 from pathlib import Path
 import torch
 def start_app():
@@ -16,7 +17,7 @@ def get_audio(url: str):
     audio_path = Path("./audio")
     with working_directory(audio_path):
         # subprocess.run(['youtube-dl', '-F', 'bestaudio[ext=m4a]', url])
-        subprocess.run(["youtube-dl", "-x", "--audio-format", "mp3", url])
 def get_v_from_url(url):
     _, val = url.split('?v=')
@@ -36,14 +37,10 @@ def get_time(seconds):
 def df_from_result(result):
-    try:
-        df = pd.json_normalize(result["segments"])
-        df["start"] = df["start"].apply(get_time)
-        df["end"] = df["end"].apply(get_time)
-        return df
-    except:
-        return None
 def find_word_timestamp(df, *words):

 import datetime
 import subprocess
 from pathlib import Path
+import numpy as np
+import pandas as pd
 import torch
+import whisper
+from fastcore.foundation import L, working_directory
 def start_app():
     audio_path = Path("./audio")
     with working_directory(audio_path):
         # subprocess.run(['youtube-dl', '-F', 'bestaudio[ext=m4a]', url])
+        subprocess.run(["yt-dlp", "-x", "--audio-format", "wav", url])
 def get_v_from_url(url):
     _, val = url.split('?v=')
 def df_from_result(result):
+    df = pd.json_normalize(result["segments"])
+    df["start"] = df["start"].apply(get_time)
+    df["end"] = df["end"].apply(get_time)
+    return df
 def find_word_timestamp(df, *words):

app.py CHANGED Viewed

@@ -1,54 +1,70 @@
 import streamlit as st
-from annotator.utils import *
-st.set_page_config(layout='wide')
 from fastcore.xtras import globtastic
-from pathlib import Path
-import subprocess
-SRT_PATH = Path('srt')
-if not SRT_PATH.exists(): SRT_PATH.mkdir(exist_ok=True)
-AUDIO_PATH = Path('./audio')
-if not AUDIO_PATH.exists(): AUDIO_PATH.mkdir(exist_ok=True)
 def make_sidebar():
     with st.sidebar:
-        st.markdown('## yt-video-annotator')
-        st.write('Link to the GitHub repo')
-@st.cache(allow_output_mutation=True)
 def caption_from_url(url):
     audio_src = get_audio(url)
     v = get_v_from_url(url)
-    audio_src = globtastic(AUDIO_PATH, file_glob='*.mp3', file_re=v)[0]
     result = annotate(audio_src)
     df = df_from_result(result)
     return audio_src, df
 def main():
     url, name = None, None
     make_sidebar()
-    place = 'https://www.youtube.com/watch?v=C0DPdy98e4c&ab_channel=SimonYapp'
     col1, col2 = st.columns([1.2, 1])
     with col1:
-        url = st.text_input('Enter URL for the YT video', place)
         st.video(url)
     with col2:
-        default_opt = 'Search for words'
-        opt = st.radio('What do you wish to do?', [default_opt, 'Generate subtitles for the entire video'])
         if opt == default_opt:
-            st.markdown('### Search for words in the video')
-            words = st.text_input('Enter words separated by a comma')
-            words = words.split(',')
-            if st.button('Get Timestamps'):
                 audio_src, df = caption_from_url(url)
-                st.write(df)
                 times = find_word_timestamp(df, *words)
                 times = np.asarray(times).reshape(len(words), -1)
                 # st.write(times)
@@ -56,7 +72,7 @@ def main():
                     st.write(f"{word} is said on {times[i].flatten()} timestamp")
         else:
-            if st.button('Generate SRT'):
                 audio_src, df = caption_from_url(url)
                 name = Path(audio_src).stem
                 s = generate_srt(df)
@@ -66,13 +82,13 @@ def main():
         if name is not None:
             with working_directory(SRT_PATH):
                 key = get_v_from_url(url)
-                srt = globtastic('.', file_glob='*.srt', file_re=key)[0]
                 with open(srt) as f:
-                    st.download_button('Download SRT', f, file_name=f'{name}.srt')
     # subprocess.run(['rm', '-rf', 'audio'])
     # subprocess.run(['rm', '-rf', 'srt'])
 if __name__ == "__main__":
-    main()

+from pathlib import Path
+import numpy as np
 import streamlit as st
 from fastcore.xtras import globtastic
+from annotator.utils import (
+    annotate,
+    df_from_result,
+    find_word_timestamp,
+    generate_srt,
+    get_audio,
+    get_v_from_url,
+    working_directory,
+    write_srt,
+)
+st.set_page_config(layout="wide")
+SRT_PATH = Path("srt")
+if not SRT_PATH.exists():
+    SRT_PATH.mkdir()
+AUDIO_PATH = Path("./audio")
+if not AUDIO_PATH.exists():
+    AUDIO_PATH.mkdir()
 def make_sidebar():
     with st.sidebar:
+        st.markdown("## yt-video-annotator")
+        st.write("Link to the GitHub repo")
+@st.cache_resource
 def caption_from_url(url):
     audio_src = get_audio(url)
     v = get_v_from_url(url)
+    audio_src = globtastic(AUDIO_PATH, file_glob="*.mp3", file_re=v)[0]
     result = annotate(audio_src)
     df = df_from_result(result)
     return audio_src, df
 def main():
     url, name = None, None
     make_sidebar()
+    place = "https://www.youtube.com/watch?v=C0DPdy98e4c&ab_channel=SimonYapp"
     col1, col2 = st.columns([1.2, 1])
     with col1:
+        url = st.text_input("Enter URL for the YT video", place)
         st.video(url)
     with col2:
+        default_opt = "Search for words"
+        opt = st.radio(
+            "What do you wish to do?",
+            [default_opt, "Generate subtitles for the entire video"],
+        )
         if opt == default_opt:
+            st.markdown("### Search for words in the video")
+            words = st.text_input("Enter words separated by a comma")
+            words = words.split(",")
+            if st.button("Get Timestamps"):
                 audio_src, df = caption_from_url(url)
                 times = find_word_timestamp(df, *words)
                 times = np.asarray(times).reshape(len(words), -1)
                 # st.write(times)
                     st.write(f"{word} is said on {times[i].flatten()} timestamp")
         else:
+            if st.button("Generate SRT"):
                 audio_src, df = caption_from_url(url)
                 name = Path(audio_src).stem
                 s = generate_srt(df)
         if name is not None:
             with working_directory(SRT_PATH):
                 key = get_v_from_url(url)
+                srt = globtastic(".", file_glob="*.srt", file_re=key)[0]
                 with open(srt) as f:
+                    st.download_button("Download SRT", f, file_name=f"{name}.srt")
     # subprocess.run(['rm', '-rf', 'audio'])
     # subprocess.run(['rm', '-rf', 'srt'])
 if __name__ == "__main__":
+    main()

dev-requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ black==23.10.0

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
-openai-whisper
-youtube-dl
-fastcore

+fastcore==1.5.29
+yt-dlp==2023.10.13
+openai-whisper==20230314
+streamlit==1.25.0