Spaces:
Runtime error
Runtime error
import cv2 | |
import streamlit as st | |
import tempfile | |
import base64 | |
import os | |
from dotenv import load_dotenv | |
from openai import OpenAI | |
import assemblyai as aai | |
from moviepy.editor import * | |
# Load environment variables | |
load_dotenv() | |
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY") | |
OpenAI.api_key = os.getenv("OPENAI_API_KEY") | |
client = OpenAI() | |
def main(): | |
st.title('Insightly Video Content Moderation') | |
# Video upload section | |
uploaded_video = st.file_uploader('Upload a video', type=["mp4", "avi", "mov"]) | |
if uploaded_video is not None: | |
# Save the video to a temp file | |
tfile = tempfile.NamedTemporaryFile(delete=False) | |
tfile.write(uploaded_video.read()) | |
video_file_path = tfile.name | |
tfile.close() | |
transcriber = aai.Transcriber() | |
transcript = transcriber.transcribe(tfile.name) | |
# Process the video and display frames in a grid layout | |
base64_frames = video_to_base64_frames(video_file_path) | |
display_frame_grid(base64_frames[::30]) # Display every 30th frame in a 3-column grid | |
st.write("Actions:") # Header for the actions/buttons section | |
# Creating four columns to align the buttons | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
if st.button("Description"): | |
st.session_state['description'] = generate_description(base64_frames) if 'description' not in st.session_state else st.session_state['description'] | |
with col2: | |
if st.button("Frame Description"): | |
st.session_state['frame_description'] = generate_frame_description(base64_frames) if 'frame_description' not in st.session_state else st.session_state['frame_description'] | |
with col3: | |
if st.button("Generate Transcript"): | |
st.session_state['transcript'] = transcript.text if 'transcript' not in st.session_state else st.session_state['transcript'] | |
with col4: | |
if st.button("Category of Video"): | |
st.session_state['category'] = generate_category(base64_frames) if 'category' not in st.session_state else st.session_state['category'] | |
# If any value exists in session state then display it | |
if 'description' in st.session_state and st.session_state['description']: | |
st.subheader("Video Description") | |
st.write(st.session_state['description']) | |
if 'frame_description' in st.session_state and st.session_state['frame_description']: | |
st.subheader("Frame Description") | |
st.write(st.session_state['frame_description']) | |
if 'transcript' in st.session_state and st.session_state['transcript']: | |
st.subheader("Video Transcript") | |
st.write(st.session_state['transcript']) | |
if 'category' in st.session_state and st.session_state['category']: | |
st.subheader("Video Category") | |
st.write(st.session_state['category']) | |
def video_to_base64_frames(video_file_path): | |
# Logic to extract all frames from the video and convert them to base64 | |
video = cv2.VideoCapture(video_file_path) | |
base64_frames = [] | |
while video.isOpened(): | |
success, frame = video.read() | |
if not success: | |
break | |
_, buffer = cv2.imencode('.jpg', frame) | |
base64_frame = base64.b64encode(buffer).decode('utf-8') | |
base64_frames.append(base64_frame) | |
video.release() | |
return base64_frames | |
######################################### | |
#Generate Video description | |
def generate_description(base64_frames): | |
prompt_messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
"1. Generate a description for this sequence of video frames in about 90 words.\ | |
Return the following : 1. List of objects in the video 2. Any restrictive content or sensitive content and if so which frame.", | |
*map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]), | |
], | |
}, | |
] | |
response = client.chat.completions.create( | |
model="gpt-4-vision-preview", | |
messages=prompt_messages, | |
max_tokens=3000, | |
) | |
return response.choices[0].message.content | |
#Generate frame description | |
def generate_frame_description(base64_frames): | |
prompt_messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
"Describe what is happening in each frame.", | |
*map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]), | |
], | |
}, | |
] | |
response = client.chat.completions.create( | |
model="gpt-4-vision-preview", | |
messages=prompt_messages, | |
max_tokens=3000, | |
) | |
return response.choices[0].message.content | |
#Generate Category of Video | |
def generate_category(base64_frames): | |
prompt_messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
"What category can this video be tagged to?", | |
*map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]), | |
], | |
}, | |
] | |
response = client.chat.completions.create( | |
model="gpt-4-vision-preview", | |
messages=prompt_messages, | |
max_tokens=3000, | |
) | |
return response.choices[0].message.content | |
######################## | |
def display_frame_grid(base64_frames): | |
cols_per_row = 3 | |
n_frames = len(base64_frames) | |
for idx in range(0, n_frames, cols_per_row): | |
cols = st.columns(cols_per_row) | |
for col_index in range(cols_per_row): | |
frame_idx = idx + col_index | |
if frame_idx < n_frames: | |
with cols[col_index]: | |
frame = base64_frames[frame_idx] | |
st.image(base64.b64decode(frame), caption=f'Frame {frame_idx * 30 + 1}', width=200) | |
if __name__ == '__main__': | |
main() |