alankabisov commited on
Commit
e06c27b
β€’
1 Parent(s): 6c022f9

preview of ui

Browse files
Files changed (1) hide show
  1. app.py +92 -6
app.py CHANGED
@@ -1,6 +1,29 @@
 
 
 
1
  import streamlit as st
2
  from urllib.parse import urlparse, parse_qs
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  def get_videoid_from_url(url:str):
@@ -14,21 +37,84 @@ def get_videoid_from_url(url:str):
14
 
15
  return video_id
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def main():
19
  st.title('YouTube Video Summary πŸ“ƒ')
20
- st.text('This app creates summary for given YouTube video based on transcripts.')
 
 
21
 
22
  col1, col2 = st.columns(2)
23
 
24
  with col1:
25
- video_id = st.text_input('YouTube Video ID:', placeholder='Live it empty if you want to use example video...')
26
- # video_id = 'aircAruvnKk'
27
- # video_id = 'https://www.youtube.com/watch?v=aircAruvnKk'
28
- st.write(get_videoid_from_url(video_id))
29
 
30
  with col2:
31
- st.button('Process')
 
 
 
 
 
32
 
33
 
34
 
 
1
+ import os
2
+
3
+
4
  import streamlit as st
5
  from urllib.parse import urlparse, parse_qs
6
 
7
+ from tqdm import tqdm
8
+ from stqdm import stqdm
9
+
10
+ # https://github.com/pytorch/pytorch/issues/77764
11
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
12
+
13
+ from youtube_transcript_api import YouTubeTranscriptApi
14
+
15
+ from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
16
+
17
+ import torch
18
+
19
+ # Setting device for PYTorch
20
+ if torch.cuda.is_available():
21
+ device = torch.device('cuda')
22
+ elif torch.has_mps:
23
+ device = torch.device('mps')
24
+ else:
25
+ device = torch.device('cpu')
26
+
27
 
28
 
29
  def get_videoid_from_url(url:str):
 
37
 
38
  return video_id
39
 
40
+ def process_click_callback():
41
+ st.session_state.process_btn = True
42
+
43
+ print('Using {} device'.format(device))
44
+
45
+ transcript_list = YouTubeTranscriptApi.list_transcripts('aircAruvnKk') # 3blue1Brown
46
+
47
+ try:
48
+ transcript = transcript_list.find_manually_created_transcript(['en'])
49
+ except Exception as e:
50
+ print('No manual transcripts were found, trying to load generated ones...')
51
+ transcript = transcript_list.find_generated_transcript(['en'])
52
+
53
+ subtitles = transcript.fetch()
54
+
55
+ subtitles = [sbt['text'] for sbt in subtitles if sbt['text'] != '[Music]']
56
+ subtitles_len = [len(sbt) for sbt in subtitles]
57
+ sbt_mean_len = sum(subtitles_len)/len(subtitles_len)
58
+
59
+ print('Mean length of subtitles: {}'.format(sbt_mean_len))
60
+ print(subtitles)
61
+ print(len(subtitles))
62
+
63
+ # Number of subtitles per step/summary
64
+ # Since number length of transcripts differs
65
+ # between generated and manual ones
66
+ # we set different step size
67
+ n_sbt_per_step = int(400 / (sbt_mean_len / 4))
68
+ print('Number subtitles per summary: {}'.format(n_sbt_per_step))
69
+
70
+ n_steps = len(subtitles) // n_sbt_per_step if len(subtitles) % n_sbt_per_step == 0 else \
71
+ len(subtitles) // n_sbt_per_step + 1
72
+
73
+ summaries = []
74
+
75
+ for i in stqdm(range(n_steps)):
76
+ sbt_txt = ' '.join(subtitles[n_sbt_per_step*i:n_sbt_per_step*(i+1)])
77
+ # print('length of text: {}'.format(len(sbt_txt)))
78
+ # print(sbt_txt)
79
+
80
+ summarizer = pipeline('summarization', model='t5-small', tokenizer='t5-small',
81
+ max_length=512, truncation=True)
82
+
83
+ summary = summarizer(sbt_txt, do_sample=False)
84
+ summary = summary[0]['summary_text']
85
+
86
+ # print('Summary: ' + summary)
87
+ summaries.append(summary)
88
+
89
+ out = ' '.join(summaries)
90
+ print(out)
91
+
92
+ st.session_state.summary_output = out
93
+ st.success('Processing complete!', icon="βœ…")
94
+ st.session_state.process_btn = False
95
+
96
+
97
 
98
  def main():
99
  st.title('YouTube Video Summary πŸ“ƒ')
100
+ st.markdown('Creates summary for given YouTube video URL based on transcripts.')
101
+ st.code('https://www.youtube.com/watch?v=aircAruvnKk')
102
+ st.code('https://youtu.be/p0G68ORc8uQ')
103
 
104
  col1, col2 = st.columns(2)
105
 
106
  with col1:
107
+ video_url = st.text_input('YouTube Video URL:', placeholder='YouTube URL',
108
+ label_visibility='collapsed')
109
+ st.write(get_videoid_from_url(video_url))
 
110
 
111
  with col2:
112
+ st.button('Process πŸ“­', key='process_btn', on_click=process_click_callback)
113
+
114
+ st.text_area(label='', key='summary_output', height=444)
115
+
116
+
117
+
118
 
119
 
120