Spaces:
Runtime error
Runtime error
KarthickAdopleAI
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
from openai import AzureOpenAI
|
|
|
2 |
import os
|
3 |
import ffmpeg
|
4 |
from typing import List
|
5 |
from moviepy.editor import VideoFileClip
|
6 |
import nltk
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
8 |
import gradio as gr
|
9 |
from pytube import YouTube
|
10 |
import requests
|
@@ -14,6 +16,7 @@ nltk.download('punkt')
|
|
14 |
nltk.download('stopwords')
|
15 |
|
16 |
|
|
|
17 |
class VideoAnalytics:
|
18 |
"""
|
19 |
Class for performing analytics on videos including transcription, summarization, topic generation,
|
@@ -30,6 +33,8 @@ class VideoAnalytics:
|
|
30 |
# Initialize AzureOpenAI client
|
31 |
self.client = AzureOpenAI()
|
32 |
|
|
|
|
|
33 |
# Initialize transcribed text variable
|
34 |
self.transcribed_text = ""
|
35 |
|
@@ -37,10 +42,18 @@ class VideoAnalytics:
|
|
37 |
self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
|
38 |
|
39 |
# Placeholder for Hugging Face API token
|
40 |
-
hf_token =
|
41 |
|
42 |
# Set headers for API requests with Hugging Face token
|
43 |
-
self.headers = {"Authorization": f"Bearer {hf_token}"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# Configure logging settings
|
46 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
@@ -73,6 +86,8 @@ class VideoAnalytics:
|
|
73 |
output = query(audio_file)
|
74 |
# Update the transcribed_text attribute with the transcription result
|
75 |
self.transcribed_text = output["text"]
|
|
|
|
|
76 |
# Return the transcribed text
|
77 |
return output["text"]
|
78 |
|
@@ -91,11 +106,11 @@ class VideoAnalytics:
|
|
91 |
# Define a conversation between system and user
|
92 |
conversation = [
|
93 |
{"role": "system", "content": "You are a Summarizer"},
|
94 |
-
{"role": "user", "content": f"""summarize the following text delimited by triple backticks.
|
95 |
In two format of Outputs given below:
|
96 |
Abstractive Summary:
|
97 |
Extractive Summary:
|
98 |
-
```{self.
|
99 |
"""}
|
100 |
]
|
101 |
# Generate completion using ChatGPT model
|
@@ -124,9 +139,38 @@ class VideoAnalytics:
|
|
124 |
# Define a conversation between system and user
|
125 |
conversation = [
|
126 |
{"role": "system", "content": "You are a Topic Generator"},
|
127 |
-
{"role": "user", "content": f"""generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.
|
128 |
list out the topics:
|
129 |
Topics:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
```{self.transcribed_text}```
|
131 |
"""}
|
132 |
]
|
@@ -144,6 +188,103 @@ class VideoAnalytics:
|
|
144 |
logging.error(f"Error generating topics: {e}")
|
145 |
return ""
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
def extract_video_important_sentence(self) -> str:
|
148 |
"""
|
149 |
Extract important sentences from the transcribed video.
|
@@ -152,8 +293,9 @@ class VideoAnalytics:
|
|
152 |
str: Extracted important sentences.
|
153 |
"""
|
154 |
try:
|
|
|
155 |
# Tokenize the sentences
|
156 |
-
sentences = nltk.sent_tokenize(self.
|
157 |
|
158 |
# Initialize TF-IDF vectorizer
|
159 |
tfidf_vectorizer = TfidfVectorizer()
|
@@ -171,7 +313,7 @@ class VideoAnalytics:
|
|
171 |
sentence_rankings.sort(reverse=True)
|
172 |
|
173 |
# Set a threshold for selecting sentences
|
174 |
-
threshold = 2 # Adjust as needed
|
175 |
|
176 |
# Select sentences with scores above the threshold
|
177 |
selected_sentences = [sentence for score, sentence in sentence_rankings if score >= threshold]
|
@@ -232,7 +374,6 @@ class VideoAnalytics:
|
|
232 |
logging.error(f"Error downloading video: {e}")
|
233 |
return ""
|
234 |
|
235 |
-
|
236 |
def main(self, video: str = None, input_path: str = None) -> tuple:
|
237 |
"""
|
238 |
Perform video analytics.
|
@@ -245,24 +386,31 @@ class VideoAnalytics:
|
|
245 |
tuple: Summary, important sentences, and topics.
|
246 |
"""
|
247 |
try:
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
input_path
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
except Exception as e:
|
268 |
# Log any errors that occur during video analytics
|
@@ -293,7 +441,16 @@ class VideoAnalytics:
|
|
293 |
Topics = gr.Textbox(show_label=False,lines=10)
|
294 |
with gr.Row():
|
295 |
topics_download = gr.DownloadButton(label="Download",value="Topics.txt",visible=True,size='lg',elem_classes="download_button")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
submit_btn.click(self.main,[video,yt_link],[summary,Important_Sentences,Topics])
|
|
|
297 |
demo.launch()
|
298 |
|
299 |
if __name__ == "__main__":
|
|
|
1 |
from openai import AzureOpenAI
|
2 |
+
from langchain_openai import AzureChatOpenAI
|
3 |
import os
|
4 |
import ffmpeg
|
5 |
from typing import List
|
6 |
from moviepy.editor import VideoFileClip
|
7 |
import nltk
|
8 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
9 |
+
from langchain import HuggingFaceHub, PromptTemplate, LLMChain
|
10 |
import gradio as gr
|
11 |
from pytube import YouTube
|
12 |
import requests
|
|
|
16 |
nltk.download('stopwords')
|
17 |
|
18 |
|
19 |
+
|
20 |
class VideoAnalytics:
|
21 |
"""
|
22 |
Class for performing analytics on videos including transcription, summarization, topic generation,
|
|
|
33 |
# Initialize AzureOpenAI client
|
34 |
self.client = AzureOpenAI()
|
35 |
|
36 |
+
self.mistral_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
37 |
+
|
38 |
# Initialize transcribed text variable
|
39 |
self.transcribed_text = ""
|
40 |
|
|
|
42 |
self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
|
43 |
|
44 |
# Placeholder for Hugging Face API token
|
45 |
+
self.hf_token = "HF_TOKEN" # Replace this with the actual Hugging Face API token
|
46 |
|
47 |
# Set headers for API requests with Hugging Face token
|
48 |
+
self.headers = {"Authorization": f"Bearer {self.hf_token}"}
|
49 |
+
|
50 |
+
# Initialize english text variable
|
51 |
+
self.english_text = ""
|
52 |
+
|
53 |
+
self.openai_llm = AzureChatOpenAI(
|
54 |
+
deployment_name="ChatGPT",
|
55 |
+
)
|
56 |
+
|
57 |
|
58 |
# Configure logging settings
|
59 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
86 |
output = query(audio_file)
|
87 |
# Update the transcribed_text attribute with the transcription result
|
88 |
self.transcribed_text = output["text"]
|
89 |
+
# Update the translation text into english_text
|
90 |
+
self.english_text = self.translation()
|
91 |
# Return the transcribed text
|
92 |
return output["text"]
|
93 |
|
|
|
106 |
# Define a conversation between system and user
|
107 |
conversation = [
|
108 |
{"role": "system", "content": "You are a Summarizer"},
|
109 |
+
{"role": "user", "content": f"""summarize the following text delimited by triple backticks.Output must in english.
|
110 |
In two format of Outputs given below:
|
111 |
Abstractive Summary:
|
112 |
Extractive Summary:
|
113 |
+
```{self.english_text}```
|
114 |
"""}
|
115 |
]
|
116 |
# Generate completion using ChatGPT model
|
|
|
139 |
# Define a conversation between system and user
|
140 |
conversation = [
|
141 |
{"role": "system", "content": "You are a Topic Generator"},
|
142 |
+
{"role": "user", "content": f"""generate single Topics from the following text don't make sentence for topic generation,delimited by triple backticks.Output must in english.
|
143 |
list out the topics:
|
144 |
Topics:
|
145 |
+
```{self.english_text}```
|
146 |
+
"""}
|
147 |
+
]
|
148 |
+
# Generate completion using ChatGPT model
|
149 |
+
response = self.client.chat.completions.create(
|
150 |
+
model="ChatGPT",
|
151 |
+
messages=conversation,
|
152 |
+
temperature=0,
|
153 |
+
max_tokens=1000
|
154 |
+
)
|
155 |
+
# Get the generated topics message
|
156 |
+
message = response.choices[0].message.content
|
157 |
+
return message
|
158 |
+
except Exception as e:
|
159 |
+
logging.error(f"Error generating topics: {e}")
|
160 |
+
return ""
|
161 |
+
|
162 |
+
def translation(self) -> str:
|
163 |
+
"""
|
164 |
+
translation from the transcribed video.
|
165 |
+
|
166 |
+
Returns:
|
167 |
+
str: translation.
|
168 |
+
"""
|
169 |
+
try:
|
170 |
+
# Define a conversation between system and user
|
171 |
+
conversation = [
|
172 |
+
{"role": "system", "content": "You are a Multilingual Translator"},
|
173 |
+
{"role": "user", "content": f""" Translate the following text in English ,delimited by triple backticks.
|
174 |
```{self.transcribed_text}```
|
175 |
"""}
|
176 |
]
|
|
|
188 |
logging.error(f"Error generating topics: {e}")
|
189 |
return ""
|
190 |
|
191 |
+
def format_prompt(self, question: str, data: str) -> str:
|
192 |
+
"""
|
193 |
+
Formats the prompt for the language model.
|
194 |
+
|
195 |
+
Args:
|
196 |
+
question (str): The user's question.
|
197 |
+
data (str): The data to be analyzed.
|
198 |
+
|
199 |
+
Returns:
|
200 |
+
str: Formatted prompt.
|
201 |
+
"""
|
202 |
+
prompt = "<s>"
|
203 |
+
prompt = f"""[INST] you are the german language and universal language expert .your task is analyze the given data and user ask any question about given data answer to the user question.your returning answer must in user's language.otherwise reply i don't know.
|
204 |
+
data:{data}
|
205 |
+
question:{question}[/INST]"""
|
206 |
+
|
207 |
+
prompt1 = f"[INST] {question} [/INST]"
|
208 |
+
return prompt+prompt1
|
209 |
+
|
210 |
+
|
211 |
+
def generate(self, prompt: str, transcribed_text: str, temperature=0.9, max_new_tokens=5000, top_p=0.95,
|
212 |
+
repetition_penalty=1.0) -> str:
|
213 |
+
"""
|
214 |
+
Generates text based on the prompt and transcribed text.
|
215 |
+
|
216 |
+
Args:
|
217 |
+
prompt (str): The prompt for generating text.
|
218 |
+
transcribed_text (str): The transcribed text for analysis.
|
219 |
+
temperature (float): Controls the randomness of the sampling. Default is 0.9.
|
220 |
+
max_new_tokens (int): Maximum number of tokens to generate. Default is 5000.
|
221 |
+
top_p (float): Nucleus sampling parameter. Default is 0.95.
|
222 |
+
repetition_penalty (float): Penalty for repeating the same token. Default is 1.0.
|
223 |
+
|
224 |
+
Returns:
|
225 |
+
str: Generated text.
|
226 |
+
"""
|
227 |
+
try:
|
228 |
+
temperature = float(temperature)
|
229 |
+
if temperature < 1e-2:
|
230 |
+
temperature = 1e-2
|
231 |
+
top_p = float(top_p)
|
232 |
+
|
233 |
+
generate_kwargs = dict(
|
234 |
+
temperature=temperature,
|
235 |
+
max_new_tokens=max_new_tokens,
|
236 |
+
top_p=top_p,
|
237 |
+
repetition_penalty=repetition_penalty,
|
238 |
+
do_sample=True,
|
239 |
+
seed=42,
|
240 |
+
)
|
241 |
+
|
242 |
+
# Format the prompt
|
243 |
+
formatted_prompt = self.format_prompt(prompt,transcribed_text)
|
244 |
+
|
245 |
+
# Generate text using the mistral client
|
246 |
+
stream = self.mistral_client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
247 |
+
output = ""
|
248 |
+
# Concatenate generated text
|
249 |
+
for response in stream:
|
250 |
+
output += response.token.text
|
251 |
+
return output.replace("</s>","")
|
252 |
+
except Exception as e:
|
253 |
+
logging.error(f"Error in text generation: {e}")
|
254 |
+
return "An error occurred during text generation."
|
255 |
+
|
256 |
+
def video_qa(self, question: str, model: str) -> str:
|
257 |
+
"""
|
258 |
+
Performs video question answering.
|
259 |
+
|
260 |
+
Args:
|
261 |
+
question (str): The question asked by the user.
|
262 |
+
model (str): The language model to be used ("OpenAI" or "Mixtral").
|
263 |
+
|
264 |
+
Returns:
|
265 |
+
str: Answer to the user's question.
|
266 |
+
"""
|
267 |
+
try:
|
268 |
+
if model == "OpenAI":
|
269 |
+
template = """you are the universal language expert .your task is analyze the given text and user ask any question about given text answer to the user question.otherwise reply i don't know.
|
270 |
+
extracted_text:{text}
|
271 |
+
user_question:{question}"""
|
272 |
+
|
273 |
+
prompt = PromptTemplate(template=template, input_variables=["text","question"])
|
274 |
+
llm_chain = LLMChain(prompt=prompt, verbose=True, llm=self.openai_llm)
|
275 |
+
|
276 |
+
# Run the language model chain
|
277 |
+
result = llm_chain.run({"text":self.english_text,"question":question})
|
278 |
+
return result
|
279 |
+
|
280 |
+
elif model == "Mixtral":
|
281 |
+
# Generate answer using Mixtral model
|
282 |
+
result = self.generate(question,self.english_text)
|
283 |
+
return result
|
284 |
+
except Exception as e:
|
285 |
+
logging.error(f"Error in video question answering: {e}")
|
286 |
+
return "An error occurred during video question answering."
|
287 |
+
|
288 |
def extract_video_important_sentence(self) -> str:
|
289 |
"""
|
290 |
Extract important sentences from the transcribed video.
|
|
|
293 |
str: Extracted important sentences.
|
294 |
"""
|
295 |
try:
|
296 |
+
|
297 |
# Tokenize the sentences
|
298 |
+
sentences = nltk.sent_tokenize(self.english_text)
|
299 |
|
300 |
# Initialize TF-IDF vectorizer
|
301 |
tfidf_vectorizer = TfidfVectorizer()
|
|
|
313 |
sentence_rankings.sort(reverse=True)
|
314 |
|
315 |
# Set a threshold for selecting sentences
|
316 |
+
threshold = 2.5 # Adjust as needed
|
317 |
|
318 |
# Select sentences with scores above the threshold
|
319 |
selected_sentences = [sentence for score, sentence in sentence_rankings if score >= threshold]
|
|
|
374 |
logging.error(f"Error downloading video: {e}")
|
375 |
return ""
|
376 |
|
|
|
377 |
def main(self, video: str = None, input_path: str = None) -> tuple:
|
378 |
"""
|
379 |
Perform video analytics.
|
|
|
386 |
tuple: Summary, important sentences, and topics.
|
387 |
"""
|
388 |
try:
|
389 |
+
video = VideoFileClip(input_path)
|
390 |
+
duration = video.duration
|
391 |
+
video.close()
|
392 |
+
|
393 |
+
if round(duration) < 600:
|
394 |
+
# Download the video if input_path is provided, otherwise use the provided video path
|
395 |
+
if input_path:
|
396 |
+
input_path = self.Download(input_path)
|
397 |
+
text = self.transcribe_video(input_path)
|
398 |
+
elif video:
|
399 |
+
text = self.transcribe_video(video)
|
400 |
+
input_path = video
|
401 |
+
|
402 |
+
# Generate summary, important sentences, and topics
|
403 |
+
summary = self.generate_video_summary()
|
404 |
+
self.write_text_files(summary,"Summary")
|
405 |
+
important_sentences = self.extract_video_important_sentence()
|
406 |
+
self.write_text_files(important_sentences,"Important_Sentence")
|
407 |
+
topics = self.generate_topics()
|
408 |
+
self.write_text_files(topics,"Topics")
|
409 |
+
|
410 |
+
# Return the generated summary, important sentences, and topics
|
411 |
+
return summary,important_sentences,topics
|
412 |
+
else:
|
413 |
+
return "Video Duration Above 10 Minutes,Try Below 10 Minutes Video","",""
|
414 |
|
415 |
except Exception as e:
|
416 |
# Log any errors that occur during video analytics
|
|
|
441 |
Topics = gr.Textbox(show_label=False,lines=10)
|
442 |
with gr.Row():
|
443 |
topics_download = gr.DownloadButton(label="Download",value="Topics.txt",visible=True,size='lg',elem_classes="download_button")
|
444 |
+
with gr.Tab("Video QA"):
|
445 |
+
with gr.Row():
|
446 |
+
with gr.Coulumn(scale=0.70):
|
447 |
+
question = gr.Textbox(show_label=False,placeholder="Ask Your Questions...")
|
448 |
+
with gr.Coulumn(scale=0.30):
|
449 |
+
model = gr.Dropdown(["OpenAI", "Mixtral"], label="Models")
|
450 |
+
with gr.Row():
|
451 |
+
result = gr.Textbox(label='Answer',lines=10)
|
452 |
submit_btn.click(self.main,[video,yt_link],[summary,Important_Sentences,Topics])
|
453 |
+
question.submit(self.video_qa,[question,model],result)
|
454 |
demo.launch()
|
455 |
|
456 |
if __name__ == "__main__":
|