shreyasiv commited on
Commit
ad76407
·
verified ·
1 Parent(s): 0f0c871

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -0
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import streamlit as st
3
+ import tempfile
4
+ import base64
5
+ import os
6
+ from dotenv import load_dotenv
7
+ from openai import OpenAI
8
+ import assemblyai as aai
9
+ from moviepy.editor import *
10
+
11
+
12
+
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+ aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
17
+ OpenAI.api_key = os.getenv("OPENAI_API_KEY")
18
+ client = OpenAI()
19
+
20
+ def main():
21
+ st.title('Insightly Video Content Moderation')
22
+
23
+ # Video upload section
24
+ uploaded_video = st.file_uploader('Upload a video', type=["mp4", "avi", "mov"])
25
+
26
+ if uploaded_video is not None:
27
+ # Save the video to a temp file
28
+ tfile = tempfile.NamedTemporaryFile(delete=False)
29
+ tfile.write(uploaded_video.read())
30
+ video_file_path = tfile.name
31
+ tfile.close()
32
+
33
+ transcriber = aai.Transcriber()
34
+ transcript = transcriber.transcribe(tfile.name)
35
+
36
+ # Process the video and display frames in a grid layout
37
+ base64_frames = video_to_base64_frames(video_file_path)
38
+ display_frame_grid(base64_frames[::30]) # Display every 30th frame in a 3-column grid
39
+
40
+ st.write("Actions:") # Header for the actions/buttons section
41
+
42
+ # Creating four columns to align the buttons
43
+ col1, col2, col3, col4 = st.columns(4)
44
+
45
+ with col1:
46
+ if st.button("Description"):
47
+ st.session_state['description'] = generate_description(base64_frames) if 'description' not in st.session_state else st.session_state['description']
48
+
49
+ with col2:
50
+ if st.button("Frame Description"):
51
+ st.session_state['frame_description'] = generate_frame_description(base64_frames) if 'frame_description' not in st.session_state else st.session_state['frame_description']
52
+
53
+ with col3:
54
+ if st.button("Generate Transcript"):
55
+ st.session_state['transcript'] = transcript.text if 'transcript' not in st.session_state else st.session_state['transcript']
56
+
57
+ with col4:
58
+ if st.button("Category of Video"):
59
+ st.session_state['category'] = generate_category(base64_frames) if 'category' not in st.session_state else st.session_state['category']
60
+
61
+ # If any value exists in session state then display it
62
+ if 'description' in st.session_state and st.session_state['description']:
63
+ st.subheader("Video Description")
64
+ st.write(st.session_state['description'])
65
+
66
+ if 'frame_description' in st.session_state and st.session_state['frame_description']:
67
+ st.subheader("Frame Description")
68
+ st.write(st.session_state['frame_description'])
69
+
70
+ if 'transcript' in st.session_state and st.session_state['transcript']:
71
+ st.subheader("Video Transcript")
72
+ st.write(st.session_state['transcript'])
73
+
74
+ if 'category' in st.session_state and st.session_state['category']:
75
+ st.subheader("Video Category")
76
+ st.write(st.session_state['category'])
77
+
78
+
79
+
80
+
81
+
82
+
83
+ def video_to_base64_frames(video_file_path):
84
+ # Logic to extract all frames from the video and convert them to base64
85
+ video = cv2.VideoCapture(video_file_path)
86
+ base64_frames = []
87
+
88
+ while video.isOpened():
89
+ success, frame = video.read()
90
+ if not success:
91
+ break
92
+
93
+ _, buffer = cv2.imencode('.jpg', frame)
94
+ base64_frame = base64.b64encode(buffer).decode('utf-8')
95
+ base64_frames.append(base64_frame)
96
+
97
+ video.release()
98
+ return base64_frames
99
+
100
+ #########################################
101
+ #Generate Video description
102
+ def generate_description(base64_frames):
103
+ prompt_messages = [
104
+ {
105
+ "role": "user",
106
+ "content": [
107
+ "1. Generate a description for this sequence of video frames in about 90 words.\
108
+ Return the following : 1. List of objects in the video 2. Any restrictive content or sensitive content and if so which frame.",
109
+ *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
110
+ ],
111
+ },
112
+ ]
113
+ response = client.chat.completions.create(
114
+ model="gpt-4-vision-preview",
115
+ messages=prompt_messages,
116
+ max_tokens=3000,
117
+ )
118
+ return response.choices[0].message.content
119
+
120
+ #Generate frame description
121
+ def generate_frame_description(base64_frames):
122
+ prompt_messages = [
123
+ {
124
+ "role": "user",
125
+ "content": [
126
+ "Describe what is happening in each frame.",
127
+ *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
128
+ ],
129
+ },
130
+ ]
131
+ response = client.chat.completions.create(
132
+ model="gpt-4-vision-preview",
133
+ messages=prompt_messages,
134
+ max_tokens=3000,
135
+ )
136
+ return response.choices[0].message.content
137
+
138
+
139
+
140
+ #Generate Category of Video
141
+ def generate_category(base64_frames):
142
+ prompt_messages = [
143
+ {
144
+ "role": "user",
145
+ "content": [
146
+ "What category can this video be tagged to?",
147
+ *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]),
148
+ ],
149
+ },
150
+ ]
151
+ response = client.chat.completions.create(
152
+ model="gpt-4-vision-preview",
153
+ messages=prompt_messages,
154
+ max_tokens=3000,
155
+ )
156
+ return response.choices[0].message.content
157
+
158
+
159
+
160
+
161
+ ########################
162
+ def display_frame_grid(base64_frames):
163
+ cols_per_row = 3
164
+ n_frames = len(base64_frames)
165
+ for idx in range(0, n_frames, cols_per_row):
166
+ cols = st.columns(cols_per_row)
167
+ for col_index in range(cols_per_row):
168
+ frame_idx = idx + col_index
169
+ if frame_idx < n_frames:
170
+ with cols[col_index]:
171
+ frame = base64_frames[frame_idx]
172
+ st.image(base64.b64decode(frame), caption=f'Frame {frame_idx * 30 + 1}', width=200)
173
+
174
+ if __name__ == '__main__':
175
+ main()