Spaces:
Runtime error
Runtime error
KarthickAdopleAI
commited on
Commit
•
e21cd35
1
Parent(s):
3647674
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ from huggingface_hub import InferenceClient
|
|
4 |
import os
|
5 |
import ffmpeg
|
6 |
from typing import List
|
7 |
-
from moviepy.editor import VideoFileClip
|
8 |
import nltk
|
9 |
from gtts import gTTS
|
10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
@@ -17,8 +16,9 @@ import os
|
|
17 |
from pydub import AudioSegment
|
18 |
import speech_recognition as sr
|
19 |
import torchaudio
|
20 |
-
from pydub.silence import split_on_silence
|
21 |
from speechbrain.inference.classifiers import EncoderClassifier
|
|
|
|
|
22 |
nltk.download('punkt')
|
23 |
nltk.download('stopwords')
|
24 |
|
@@ -37,8 +37,9 @@ class VideoAnalytics:
|
|
37 |
"""
|
38 |
# Initialize AzureOpenAI client
|
39 |
self.client = AzureOpenAI()
|
40 |
-
|
41 |
-
|
|
|
42 |
|
43 |
# Initialize transcribed text variable
|
44 |
self.transcribed_text = ""
|
@@ -74,18 +75,52 @@ class VideoAnalytics:
|
|
74 |
try:
|
75 |
# Load the MP3 file
|
76 |
audio = AudioSegment.from_mp3(mp3_file)
|
77 |
-
|
78 |
# Export the audio to WAV format
|
79 |
audio.export(wav_file, format="wav")
|
80 |
-
|
81 |
logging.info(f"MP3 file '{mp3_file}' converted to WAV successfully: {wav_file}")
|
82 |
-
|
83 |
return wav_file
|
84 |
except Exception as e:
|
85 |
# Log the exception and raise it further
|
86 |
logging.error(f"Error occurred while converting MP3 to WAV: {e}")
|
87 |
raise e
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# Function to recognize speech in the audio file
|
90 |
def transcribe_audio(self,path: str,lang: str):
|
91 |
"""Transcribe speech from an audio file."""
|
@@ -108,51 +143,30 @@ class VideoAnalytics:
|
|
108 |
sound = AudioSegment.from_file(path)
|
109 |
chunks = split_on_silence(sound, min_silence_len=500, silence_thresh=sound.dBFS-14, keep_silence=500)
|
110 |
folder_name = "audio-chunks"
|
111 |
-
|
112 |
if not os.path.isdir(folder_name):
|
113 |
os.mkdir(folder_name)
|
114 |
-
|
115 |
whole_text = ""
|
116 |
-
|
117 |
for i, audio_chunk in enumerate(chunks, start=1):
|
118 |
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
119 |
audio_chunk.export(chunk_filename, format="wav")
|
120 |
-
|
121 |
text = self.transcribe_audio(chunk_filename,lang)
|
122 |
-
|
123 |
if text:
|
124 |
text = f"{text.capitalize()}. "
|
125 |
logging.info(f"Transcribed {chunk_filename}: {text}")
|
126 |
whole_text += text
|
127 |
else:
|
128 |
logging.warning(f"No speech recognized in {chunk_filename}")
|
129 |
-
|
130 |
return whole_text
|
131 |
except Exception as e:
|
132 |
logging.error(f"Error processing audio: {e}")
|
133 |
return ""
|
134 |
-
|
135 |
-
def split_audio(self,input_file):
|
136 |
-
# Load the audio file
|
137 |
-
audio = AudioSegment.from_file(input_file)
|
138 |
-
|
139 |
-
# Define segment length in milliseconds (5 minutes = 300,000 milliseconds)
|
140 |
-
segment_length = 60000
|
141 |
-
|
142 |
-
# Split the audio into segments
|
143 |
-
segments = []
|
144 |
-
for i, start_time in enumerate(range(0, len(audio), segment_length)):
|
145 |
-
# Calculate end time for current segment
|
146 |
-
end_time = start_time + segment_length if start_time + segment_length < len(audio) else len(audio)
|
147 |
-
|
148 |
-
# Extract segment
|
149 |
-
segment = audio[start_time:end_time]
|
150 |
-
|
151 |
-
# Append segment to list
|
152 |
-
segments.append(segment)
|
153 |
|
154 |
-
return segments
|
155 |
-
|
156 |
def transcribe_video(self, vid: str) -> str:
|
157 |
"""
|
158 |
Transcribe the audio of the video.
|
@@ -191,68 +205,133 @@ class VideoAnalytics:
|
|
191 |
logging.error(f"Error transcribing video: {e}")
|
192 |
return ""
|
193 |
|
194 |
-
def generate_video_summary(self) -> str:
|
195 |
"""
|
196 |
-
Generate a summary of the
|
197 |
Returns:
|
198 |
str: Generated summary.
|
199 |
"""
|
200 |
try:
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
In two format of Outputs given below:
|
206 |
Abstractive Summary:
|
207 |
Extractive Summary:
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
response = self.client.chat.completions.create(
|
213 |
-
model="ChatGPT",
|
214 |
-
messages=conversation,
|
215 |
-
temperature=0,
|
216 |
-
max_tokens=1000
|
217 |
-
)
|
218 |
-
# Get the generated summary message
|
219 |
-
message = response.choices[0].message.content
|
220 |
-
return message
|
221 |
except Exception as e:
|
222 |
logging.error(f"Error generating video summary: {e}")
|
223 |
return ""
|
224 |
|
225 |
|
226 |
-
def generate_topics(self) -> str:
|
227 |
"""
|
228 |
-
Generate topics from the
|
229 |
Returns:
|
230 |
str: Generated topics.
|
231 |
"""
|
232 |
try:
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
list out the topics:
|
238 |
Topics:
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
response = self.client.chat.completions.create(
|
244 |
-
model="ChatGPT",
|
245 |
-
messages=conversation,
|
246 |
-
temperature=0,
|
247 |
-
max_tokens=1000
|
248 |
-
)
|
249 |
-
# Get the generated topics message
|
250 |
-
message = response.choices[0].message.content
|
251 |
-
return message
|
252 |
except Exception as e:
|
253 |
logging.error(f"Error generating topics: {e}")
|
254 |
return ""
|
255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
def translation(self) -> str:
|
257 |
"""
|
258 |
translation from the transcribed video.
|
@@ -299,7 +378,7 @@ class VideoAnalytics:
|
|
299 |
return prompt+prompt1
|
300 |
|
301 |
|
302 |
-
def generate(self,
|
303 |
repetition_penalty=1.0) -> str:
|
304 |
"""
|
305 |
Generates text based on the prompt and transcribed text.
|
@@ -328,11 +407,8 @@ class VideoAnalytics:
|
|
328 |
seed=42,
|
329 |
)
|
330 |
|
331 |
-
# Format the prompt
|
332 |
-
formatted_prompt = self.format_prompt(prompt,transcribed_text)
|
333 |
-
|
334 |
# Generate text using the mistral client
|
335 |
-
stream = self.mistral_client.text_generation(
|
336 |
output = ""
|
337 |
# Concatenate generated text
|
338 |
for response in stream:
|
@@ -354,7 +430,7 @@ class VideoAnalytics:
|
|
354 |
try:
|
355 |
if model == "OpenAI":
|
356 |
template = """you are the universal language expert .your task is analyze the given text and user ask any question about given text answer to the user question.otherwise reply i don't know.
|
357 |
-
|
358 |
user_question:{question}"""
|
359 |
|
360 |
prompt = PromptTemplate(template=template, input_variables=["text","question"])
|
@@ -372,46 +448,6 @@ class VideoAnalytics:
|
|
372 |
logging.error(f"Error in video question answering: {e}")
|
373 |
return "An error occurred during video question answering."
|
374 |
|
375 |
-
def extract_video_important_sentence(self) -> str:
|
376 |
-
"""
|
377 |
-
Extract important sentences from the transcribed video.
|
378 |
-
Returns:
|
379 |
-
str: Extracted important sentences.
|
380 |
-
"""
|
381 |
-
try:
|
382 |
-
|
383 |
-
# Tokenize the sentences
|
384 |
-
sentences = nltk.sent_tokenize(self.english_text)
|
385 |
-
|
386 |
-
# Initialize TF-IDF vectorizer
|
387 |
-
tfidf_vectorizer = TfidfVectorizer()
|
388 |
-
|
389 |
-
# Fit the vectorizer on the summary sentences
|
390 |
-
tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
|
391 |
-
|
392 |
-
# Calculate sentence scores based on TF-IDF values
|
393 |
-
sentence_scores = tfidf_matrix.sum(axis=1)
|
394 |
-
|
395 |
-
# Create a list of (score, sentence) tuples
|
396 |
-
sentence_rankings = [(score, sentence) for score, sentence in zip(sentence_scores, sentences)]
|
397 |
-
|
398 |
-
# Sort sentences by score in descending order
|
399 |
-
sentence_rankings.sort(reverse=True)
|
400 |
-
|
401 |
-
# Set a threshold for selecting sentences
|
402 |
-
threshold = 2.5 # Adjust as needed
|
403 |
-
|
404 |
-
# Select sentences with scores above the threshold
|
405 |
-
selected_sentences = [sentence for score, sentence in sentence_rankings if score >= threshold]
|
406 |
-
|
407 |
-
# Join selected sentences to form the summary
|
408 |
-
summary = '\n\n'.join(selected_sentences)
|
409 |
-
|
410 |
-
return summary
|
411 |
-
|
412 |
-
except Exception as e:
|
413 |
-
logging.error(f"Error extracting important sentences: {e}")
|
414 |
-
return ""
|
415 |
|
416 |
def write_text_files(self, text: str, filename: str) -> None:
|
417 |
"""
|
@@ -481,7 +517,7 @@ class VideoAnalytics:
|
|
481 |
logging.error(f"Error occurred while saving audio: {e}")
|
482 |
raise e
|
483 |
|
484 |
-
def main(self, video: str = None, input_path: str = None) -> tuple:
|
485 |
"""
|
486 |
Perform video analytics.
|
487 |
Args:
|
@@ -511,13 +547,13 @@ class VideoAnalytics:
|
|
511 |
else:
|
512 |
return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","","",None,None,None
|
513 |
# Generate summary, important sentences, and topics
|
514 |
-
summary = self.generate_video_summary()
|
515 |
self.write_text_files(summary,"Summary")
|
516 |
summary_voice = self.save_audio_with_gtts(summary,"summary.mp3")
|
517 |
-
important_sentences = self.extract_video_important_sentence()
|
518 |
self.write_text_files(important_sentences,"Important_Sentence")
|
519 |
important_sentences_voice = self.save_audio_with_gtts(important_sentences,"important_sentences.mp3")
|
520 |
-
topics = self.generate_topics()
|
521 |
self.write_text_files(topics,"Topics")
|
522 |
topics_voice = self.save_audio_with_gtts(topics,"topics.mp3")
|
523 |
|
@@ -533,7 +569,10 @@ class VideoAnalytics:
|
|
533 |
with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
|
534 |
gr.HTML("""<center><h1>Video Analytics</h1></center>""")
|
535 |
with gr.Row():
|
536 |
-
|
|
|
|
|
|
|
537 |
with gr.Row():
|
538 |
video = gr.Video(sources="upload",height=200,width=300)
|
539 |
with gr.Row():
|
@@ -567,9 +606,9 @@ class VideoAnalytics:
|
|
567 |
model = gr.Dropdown(["OpenAI", "Mixtral"],show_label=False,value="model")
|
568 |
with gr.Row():
|
569 |
result = gr.Textbox(label='Answer',lines=10)
|
570 |
-
submit_btn.click(self.main,[video,yt_link],[summary,Important_Sentences,Topics,summary_audio,important_sentence_audio,topics_audio])
|
571 |
question.submit(self.video_qa,[question,model],result)
|
572 |
-
demo.launch()
|
573 |
|
574 |
if __name__ == "__main__":
|
575 |
video_analytics = VideoAnalytics()
|
|
|
4 |
import os
|
5 |
import ffmpeg
|
6 |
from typing import List
|
|
|
7 |
import nltk
|
8 |
from gtts import gTTS
|
9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
16 |
from pydub import AudioSegment
|
17 |
import speech_recognition as sr
|
18 |
import torchaudio
|
|
|
19 |
from speechbrain.inference.classifiers import EncoderClassifier
|
20 |
+
from pydub.silence import split_on_silence
|
21 |
+
from moviepy.editor import VideoFileClip
|
22 |
nltk.download('punkt')
|
23 |
nltk.download('stopwords')
|
24 |
|
|
|
37 |
"""
|
38 |
# Initialize AzureOpenAI client
|
39 |
self.client = AzureOpenAI()
|
40 |
+
|
41 |
+
hf_token =os.getenv("HF_TOKEN")
|
42 |
+
self.mistral_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1",token=hf_token)
|
43 |
|
44 |
# Initialize transcribed text variable
|
45 |
self.transcribed_text = ""
|
|
|
75 |
try:
|
76 |
# Load the MP3 file
|
77 |
audio = AudioSegment.from_mp3(mp3_file)
|
78 |
+
|
79 |
# Export the audio to WAV format
|
80 |
audio.export(wav_file, format="wav")
|
81 |
+
|
82 |
logging.info(f"MP3 file '{mp3_file}' converted to WAV successfully: {wav_file}")
|
83 |
+
|
84 |
return wav_file
|
85 |
except Exception as e:
|
86 |
# Log the exception and raise it further
|
87 |
logging.error(f"Error occurred while converting MP3 to WAV: {e}")
|
88 |
raise e
|
89 |
|
90 |
+
def split_audio(self, input_file: str) -> list:
|
91 |
+
"""
|
92 |
+
Split an audio file into segments of a specified length.
|
93 |
+
|
94 |
+
Args:
|
95 |
+
input_file (str): Path to the input audio file.
|
96 |
+
|
97 |
+
Returns:
|
98 |
+
list: List of audio segments.
|
99 |
+
"""
|
100 |
+
try:
|
101 |
+
# Load the audio file
|
102 |
+
audio = AudioSegment.from_file(input_file)
|
103 |
+
|
104 |
+
# Define segment length in milliseconds (5 minutes = 300,000 milliseconds)
|
105 |
+
segment_length = 60000
|
106 |
+
|
107 |
+
# Split the audio into segments
|
108 |
+
segments = []
|
109 |
+
for i, start_time in enumerate(range(0, len(audio), segment_length)):
|
110 |
+
# Calculate end time for current segment
|
111 |
+
end_time = start_time + segment_length if start_time + segment_length < len(audio) else len(audio)
|
112 |
+
|
113 |
+
# Extract segment
|
114 |
+
segment = audio[start_time:end_time]
|
115 |
+
|
116 |
+
# Append segment to list
|
117 |
+
segments.append(segment)
|
118 |
+
|
119 |
+
return segments
|
120 |
+
except Exception as e:
|
121 |
+
print(f"An error occurred: {e}")
|
122 |
+
return []
|
123 |
+
|
124 |
# Function to recognize speech in the audio file
|
125 |
def transcribe_audio(self,path: str,lang: str):
|
126 |
"""Transcribe speech from an audio file."""
|
|
|
143 |
sound = AudioSegment.from_file(path)
|
144 |
chunks = split_on_silence(sound, min_silence_len=500, silence_thresh=sound.dBFS-14, keep_silence=500)
|
145 |
folder_name = "audio-chunks"
|
146 |
+
|
147 |
if not os.path.isdir(folder_name):
|
148 |
os.mkdir(folder_name)
|
149 |
+
|
150 |
whole_text = ""
|
151 |
+
|
152 |
for i, audio_chunk in enumerate(chunks, start=1):
|
153 |
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
154 |
audio_chunk.export(chunk_filename, format="wav")
|
155 |
+
|
156 |
text = self.transcribe_audio(chunk_filename,lang)
|
157 |
+
|
158 |
if text:
|
159 |
text = f"{text.capitalize()}. "
|
160 |
logging.info(f"Transcribed {chunk_filename}: {text}")
|
161 |
whole_text += text
|
162 |
else:
|
163 |
logging.warning(f"No speech recognized in {chunk_filename}")
|
164 |
+
|
165 |
return whole_text
|
166 |
except Exception as e:
|
167 |
logging.error(f"Error processing audio: {e}")
|
168 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
|
|
|
|
170 |
def transcribe_video(self, vid: str) -> str:
|
171 |
"""
|
172 |
Transcribe the audio of the video.
|
|
|
205 |
logging.error(f"Error transcribing video: {e}")
|
206 |
return ""
|
207 |
|
208 |
+
def generate_video_summary(self,model) -> str:
|
209 |
"""
|
210 |
+
Generate a summary of the transcribe_video.
|
211 |
Returns:
|
212 |
str: Generated summary.
|
213 |
"""
|
214 |
try:
|
215 |
+
if model == "OpenAI":
|
216 |
+
# Define a conversation between system and user
|
217 |
+
conversation = [
|
218 |
+
{"role": "system", "content": "You are a Summarizer"},
|
219 |
+
{"role": "user", "content": f"""summarize the following text delimited by triple backticks.Output must in english.give me a detailed summary.extractive summary working br like extract sentences from given text to return as summary,abstractive summary working be like summary of what about the given text.don't make bullet points write like a passage.
|
220 |
+
In two format of Outputs given below:
|
221 |
+
Abstractive Summary:
|
222 |
+
Extractive Summary:
|
223 |
+
```{self.english_text}```
|
224 |
+
"""}
|
225 |
+
]
|
226 |
+
# Generate completion using ChatGPT model
|
227 |
+
response = self.client.chat.completions.create(
|
228 |
+
model="ChatGPT",
|
229 |
+
messages=conversation,
|
230 |
+
temperature=0,
|
231 |
+
max_tokens=1000
|
232 |
+
)
|
233 |
+
# Get the generated summary message
|
234 |
+
message = response.choices[0].message.content
|
235 |
+
return message
|
236 |
+
|
237 |
+
elif model == "Mixtral":
|
238 |
+
task = "summary"
|
239 |
+
# Generate answer using Mixtral model
|
240 |
+
prompt = f"""<s>[INST] summarize the following text delimited by triple backticks.Output must in english.give me a detailed summary.extractive summary working br like extract sentences from given text to return as summary,abstractive summary working be like summary of what about the given text.don't make bullet points write like a passage.
|
241 |
In two format of Outputs given below:
|
242 |
Abstractive Summary:
|
243 |
Extractive Summary:
|
244 |
+
```data:{self.english_text}```[/INST]"""
|
245 |
+
result = self.generate(prompt)
|
246 |
+
return result
|
247 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
except Exception as e:
|
249 |
logging.error(f"Error generating video summary: {e}")
|
250 |
return ""
|
251 |
|
252 |
|
253 |
+
def generate_topics(self,model) -> str:
|
254 |
"""
|
255 |
+
Generate topics from the transcribe_video.
|
256 |
Returns:
|
257 |
str: Generated topics.
|
258 |
"""
|
259 |
try:
|
260 |
+
if model == "OpenAI":
|
261 |
+
# Define a conversation between system and user
|
262 |
+
conversation = [
|
263 |
+
{"role": "system", "content": "You are a Topic Generator"},
|
264 |
+
{"role": "user", "content": f"""generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.Output must in english.
|
265 |
+
list out the topics:
|
266 |
+
Topics:
|
267 |
+
```{self.english_text}```
|
268 |
+
"""}
|
269 |
+
]
|
270 |
+
# Generate completion using ChatGPT model
|
271 |
+
response = self.client.chat.completions.create(
|
272 |
+
model="ChatGPT",
|
273 |
+
messages=conversation,
|
274 |
+
temperature=0,
|
275 |
+
max_tokens=1000
|
276 |
+
)
|
277 |
+
# Get the generated topics message
|
278 |
+
message = response.choices[0].message.content
|
279 |
+
return message
|
280 |
+
elif model == "Mixtral":
|
281 |
+
task = "topics"
|
282 |
+
# Generate answer using Mixtral model
|
283 |
+
prompt = f"""<s>[INST]generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.Output must in english.
|
284 |
list out the topics:
|
285 |
Topics:
|
286 |
+
```data:{self.english_text}```[/INST]"""
|
287 |
+
result = self.generate(prompt)
|
288 |
+
return result
|
289 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
except Exception as e:
|
291 |
logging.error(f"Error generating topics: {e}")
|
292 |
return ""
|
293 |
|
294 |
+
def extract_video_important_sentence(self,model) -> str:
|
295 |
+
"""
|
296 |
+
Extract important sentences from the pdf.
|
297 |
+
Returns:
|
298 |
+
str: Extracted important sentences.
|
299 |
+
"""
|
300 |
+
try:
|
301 |
+
if model == "OpenAI":
|
302 |
+
# Define a conversation between system and user
|
303 |
+
conversation = [
|
304 |
+
{"role": "system", "content": "You are a Sentence Extracter"},
|
305 |
+
{"role": "user", "content": f""" Extract Most important of the sentences from text.the text is given in triple backtics.
|
306 |
+
listout the sentences:
|
307 |
+
```{self.english_text}```
|
308 |
+
"""}
|
309 |
+
]
|
310 |
+
# Generate completion using ChatGPT model
|
311 |
+
response = self.client.chat.completions.create(
|
312 |
+
model="ChatGPT",
|
313 |
+
messages=conversation,
|
314 |
+
temperature=0,
|
315 |
+
max_tokens=1000
|
316 |
+
)
|
317 |
+
# Get the generated topics message
|
318 |
+
message = response.choices[0].message.content
|
319 |
+
return message
|
320 |
+
elif model == "Mixtral":
|
321 |
+
task = "topics"
|
322 |
+
# Generate answer using Mixtral model
|
323 |
+
prompt = f"""<s>[INST] Extract Most important of the sentences from text.the text is given in triple backtics.
|
324 |
+
listout the sentences:
|
325 |
+
```{self.english_text}```[/INST]"""
|
326 |
+
result = self.generate(prompt)
|
327 |
+
return result
|
328 |
+
|
329 |
+
except Exception as e:
|
330 |
+
logging.error(f"Error Extracting Important Sentence: {e}")
|
331 |
+
return ""
|
332 |
+
|
333 |
+
|
334 |
+
|
335 |
def translation(self) -> str:
|
336 |
"""
|
337 |
translation from the transcribed video.
|
|
|
378 |
return prompt+prompt1
|
379 |
|
380 |
|
381 |
+
def generate(self, task: str,temperature=0.9, max_new_tokens=5000, top_p=0.95,
|
382 |
repetition_penalty=1.0) -> str:
|
383 |
"""
|
384 |
Generates text based on the prompt and transcribed text.
|
|
|
407 |
seed=42,
|
408 |
)
|
409 |
|
|
|
|
|
|
|
410 |
# Generate text using the mistral client
|
411 |
+
stream = self.mistral_client.text_generation(task, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
412 |
output = ""
|
413 |
# Concatenate generated text
|
414 |
for response in stream:
|
|
|
430 |
try:
|
431 |
if model == "OpenAI":
|
432 |
template = """you are the universal language expert .your task is analyze the given text and user ask any question about given text answer to the user question.otherwise reply i don't know.
|
433 |
+
english_text:{text}
|
434 |
user_question:{question}"""
|
435 |
|
436 |
prompt = PromptTemplate(template=template, input_variables=["text","question"])
|
|
|
448 |
logging.error(f"Error in video question answering: {e}")
|
449 |
return "An error occurred during video question answering."
|
450 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
def write_text_files(self, text: str, filename: str) -> None:
|
453 |
"""
|
|
|
517 |
logging.error(f"Error occurred while saving audio: {e}")
|
518 |
raise e
|
519 |
|
520 |
+
def main(self, video: str = None, input_path: str = None,model: str = None) -> tuple:
|
521 |
"""
|
522 |
Perform video analytics.
|
523 |
Args:
|
|
|
547 |
else:
|
548 |
return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","","",None,None,None
|
549 |
# Generate summary, important sentences, and topics
|
550 |
+
summary = self.generate_video_summary(model)
|
551 |
self.write_text_files(summary,"Summary")
|
552 |
summary_voice = self.save_audio_with_gtts(summary,"summary.mp3")
|
553 |
+
important_sentences = self.extract_video_important_sentence(model)
|
554 |
self.write_text_files(important_sentences,"Important_Sentence")
|
555 |
important_sentences_voice = self.save_audio_with_gtts(important_sentences,"important_sentences.mp3")
|
556 |
+
topics = self.generate_topics(model)
|
557 |
self.write_text_files(topics,"Topics")
|
558 |
topics_voice = self.save_audio_with_gtts(topics,"topics.mp3")
|
559 |
|
|
|
569 |
with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
|
570 |
gr.HTML("""<center><h1>Video Analytics</h1></center>""")
|
571 |
with gr.Row():
|
572 |
+
with gr.Column(scale=0.70):
|
573 |
+
yt_link = gr.Textbox(label= "Youtube Link",placeholder="https://www.youtube.com/watch?v=")
|
574 |
+
with gr.Column(scale=0.30):
|
575 |
+
model_selection = gr.Dropdown(["OpenAI", "Mixtral"],label="Model",value="model")
|
576 |
with gr.Row():
|
577 |
video = gr.Video(sources="upload",height=200,width=300)
|
578 |
with gr.Row():
|
|
|
606 |
model = gr.Dropdown(["OpenAI", "Mixtral"],show_label=False,value="model")
|
607 |
with gr.Row():
|
608 |
result = gr.Textbox(label='Answer',lines=10)
|
609 |
+
submit_btn.click(self.main,[video,yt_link,model_selection],[summary,Important_Sentences,Topics,summary_audio,important_sentence_audio,topics_audio])
|
610 |
question.submit(self.video_qa,[question,model],result)
|
611 |
+
demo.launch(debug = True)
|
612 |
|
613 |
if __name__ == "__main__":
|
614 |
video_analytics = VideoAnalytics()
|