VideoAnalytics / app.py
KarthickAdopleAI's picture
Update app.py
1b1cfc3 verified
raw
history blame
10.8 kB
from openai import AzureOpenAI
import os
import ffmpeg
from typing import List
from moviepy.editor import VideoFileClip
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
import gradio as gr
from pytube import YouTube
import requests
import logging
nltk.download('punkt')
nltk.download('stopwords')
class VideoAnalytics:
"""
Class for performing analytics on videos including transcription, summarization, topic generation,
and extraction of important sentences.
"""
def __init__(self):
"""
Initialize the VideoAnalytics object.
Args:
hf_token (str): Hugging Face API token.
"""
# Initialize AzureOpenAI client
self.client = AzureOpenAI()
# Initialize transcribed text variable
self.transcribed_text = ""
# API URL for accessing the Hugging Face model
self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
# Placeholder for Hugging Face API token
hf_token = os.get_environ("HF_TOKEN") # Replace this with the actual Hugging Face API token
# Set headers for API requests with Hugging Face token
self.headers = {"Authorization": f"Bearer {hf_token}"}
# Configure logging settings
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def transcribe_video(self, vid: str) -> str:
"""
Transcribe the audio of the video.
Args:
vid (str): Path to the video file.
Returns:
str: Transcribed text.
"""
try:
# Load the video file and extract audio
video = VideoFileClip(vid)
audio = video.audio
# Write audio to a temporary file
audio.write_audiofile("output_audio.mp3")
audio_file = open("output_audio.mp3", "rb")
# Define a helper function to query the Hugging Face model
def query(data):
response = requests.post(self.API_URL, headers=self.headers, data=data)
return response.json()
# Send audio data to the Hugging Face model for transcription
output = query(audio_file)
# Update the transcribed_text attribute with the transcription result
self.transcribed_text = output["text"]
# Return the transcribed text
return output["text"]
except Exception as e:
logging.error(f"Error transcribing video: {e}")
return ""
def generate_video_summary(self) -> str:
"""
Generate a summary of the transcribed video.
Returns:
str: Generated summary.
"""
try:
# Define a conversation between system and user
conversation = [
{"role": "system", "content": "You are a Summarizer"},
{"role": "user", "content": f"""summarize the following text delimited by triple backticks.
In two format of Outputs given below:
Abstractive Summary:
Extractive Summary:
```{self.transcribed_text}```
"""}
]
# Generate completion using ChatGPT model
response = self.client.chat.completions.create(
model="ChatGPT",
messages=conversation,
temperature=0,
max_tokens=1000
)
# Get the generated summary message
message = response.choices[0].message.content
return message
except Exception as e:
logging.error(f"Error generating video summary: {e}")
return ""
def generate_topics(self) -> str:
"""
Generate topics from the transcribed video.
Returns:
str: Generated topics.
"""
try:
# Define a conversation between system and user
conversation = [
{"role": "system", "content": "You are a Topic Generator"},
{"role": "user", "content": f"""generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.
list out the topics:
Topics:
```{self.transcribed_text}```
"""}
]
# Generate completion using ChatGPT model
response = self.client.chat.completions.create(
model="ChatGPT",
messages=conversation,
temperature=0,
max_tokens=1000
)
# Get the generated topics message
message = response.choices[0].message.content
return message
except Exception as e:
logging.error(f"Error generating topics: {e}")
return ""
def extract_video_important_sentence(self) -> str:
"""
Extract important sentences from the transcribed video.
Returns:
str: Extracted important sentences.
"""
try:
# Tokenize the sentences
sentences = nltk.sent_tokenize(self.transcribed_text)
# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()
# Fit the vectorizer on the summary sentences
tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
# Calculate sentence scores based on TF-IDF values
sentence_scores = tfidf_matrix.sum(axis=1)
# Create a list of (score, sentence) tuples
sentence_rankings = [(score, sentence) for score, sentence in zip(sentence_scores, sentences)]
# Sort sentences by score in descending order
sentence_rankings.sort(reverse=True)
# Set a threshold for selecting sentences
threshold = 2 # Adjust as needed
# Select sentences with scores above the threshold
selected_sentences = [sentence for score, sentence in sentence_rankings if score >= threshold]
# Join selected sentences to form the summary
summary = '\n\n'.join(selected_sentences)
return summary
except Exception as e:
logging.error(f"Error extracting important sentences: {e}")
return ""
def write_text_files(self, text: str, filename: str) -> None:
"""
Write text to a file.
Args:
text (str): Text to be written to the file.
filename (str): Name of the file.
"""
try:
file_path = f"{filename}.txt"
with open(file_path, 'w') as file:
# Write content to the file
file.write(text)
except Exception as e:
logging.error(f"Error writing text to file: {e}")
def Download(self, link: str) -> str:
"""
Download a video from YouTube.
Args:
link (str): YouTube video link.
Returns:
str: Path to the downloaded video file.
"""
try:
# Initialize YouTube object with the provided link
youtubeObject = YouTube(link)
# Get the highest resolution stream
youtubeObject = youtubeObject.streams.get_highest_resolution()
try:
# Attempt to download the video
file_name = youtubeObject.download()
return file_name
except:
# Log any errors that occur during video download
logging.info("An error has occurred")
logging.info("Download is completed successfully")
except Exception as e:
# Log any errors that occur during initialization of YouTube object
logging.error(f"Error downloading video: {e}")
return ""
def main(self, video: str = None, input_path: str = None) -> tuple:
"""
Perform video analytics.
Args:
video (str): Path to the video file.
input_path (str): Input path for the video.
Returns:
tuple: Summary, important sentences, and topics.
"""
try:
# Download the video if input_path is provided, otherwise use the provided video path
if input_path:
input_path = self.Download(input_path)
text = self.transcribe_video(input_path)
elif video:
text = self.transcribe_video(video)
input_path = video
# Generate summary, important sentences, and topics
summary = self.generate_video_summary()
self.write_text_files(summary,"Summary")
important_sentences = self.extract_video_important_sentence()
self.write_text_files(important_sentences,"Important_Sentence")
topics = self.generate_topics()
self.write_text_files(topics,"Topics")
# Return the generated summary, important sentences, and topics
return summary,important_sentences,topics
except Exception as e:
# Log any errors that occur during video analytics
logging.error(f"Error in main function: {e}")
return "", "", ""
def gradio_interface(self):
with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
gr.HTML("""<center><h1>Video Analytics</h1></center>""")
with gr.Row():
yt_link = gr.Textbox(label= "Youtube Link",placeholder="https://www.youtube.com/watch?v=")
with gr.Row():
video = gr.Video(sources="upload",height=200,width=300)
with gr.Row():
submit_btn = gr.Button(value="Submit")
with gr.Tab("Summary"):
with gr.Row():
summary = gr.Textbox(show_label=False,lines=10)
with gr.Row():
summary_download = gr.DownloadButton(label="Download",value="Summary.txt",visible=True,size='lg',elem_classes="download_button")
with gr.Tab("Important Sentences"):
with gr.Row():
Important_Sentences = gr.Textbox(show_label=False,lines=10)
with gr.Row():
sentence_download = gr.DownloadButton(label="Download",value="Important_Sentence.txt",visible=True,size='lg',elem_classes="download_button")
with gr.Tab("Topics"):
with gr.Row():
Topics = gr.Textbox(show_label=False,lines=10)
with gr.Row():
topics_download = gr.DownloadButton(label="Download",value="Topics.txt",visible=True,size='lg',elem_classes="download_button")
submit_btn.click(self.main,[video,yt_link],[summary,Important_Sentences,Topics])
demo.launch()
if __name__ == "__main__":
video_analytics = VideoAnalytics()
video_analytics.gradio_interface()