Spaces:
Build error
Build error
BilalSardar
commited on
Commit
·
46060b5
1
Parent(s):
3d68af8
Update app.py
Browse files
app.py
CHANGED
@@ -37,26 +37,26 @@ warnings.filterwarnings("ignore")
|
|
37 |
# Models #
|
38 |
###############################################
|
39 |
|
40 |
-
summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
|
41 |
-
summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
|
42 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
43 |
-
summary_model = summary_model.to(device)
|
44 |
|
45 |
|
46 |
-
glove_file = 'glove.6B.300d.txt'
|
47 |
-
tmp_file = 'word2vec-glove.6B.300d.txt'
|
48 |
-
glove2word2vec(glove_file, tmp_file)
|
49 |
-
model = KeyedVectors.load_word2vec_format(tmp_file)
|
50 |
|
51 |
|
52 |
-
question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
|
53 |
-
question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
|
54 |
-
question_model = question_model.to(device)
|
55 |
|
56 |
###############################################
|
57 |
def Process_audio(fileName):
|
58 |
text=''
|
59 |
-
txtf=
|
60 |
myaudio=AudioSegment.from_wav(fileName)
|
61 |
chunks_length_ms=8000
|
62 |
chunks=make_chunks(myaudio,chunks_length_ms)
|
@@ -71,7 +71,7 @@ def Process_audio(fileName):
|
|
71 |
|
72 |
try:
|
73 |
rec=r.recognize_google(audio_listened)
|
74 |
-
txtf.
|
75 |
text+=rec+"."
|
76 |
except sr.UnknownValueError:
|
77 |
print("I dont recognize your audio")
|
@@ -85,12 +85,14 @@ except:
|
|
85 |
|
86 |
def UrlToAudio(VideoUrl):
|
87 |
url=VideoUrl
|
|
|
88 |
os.system("yt-dlp -x --audio-format wav " + url)
|
89 |
# load audio and pad/trim it to fit 30 seconds
|
90 |
base_path = Path(r"")
|
91 |
for wav_file_path in base_path.glob("*.wav"):
|
92 |
-
Process_audio(str(wav_file_path))
|
93 |
break
|
|
|
94 |
|
95 |
def set_seed(seed: int):
|
96 |
random.seed(seed)
|
@@ -251,9 +253,9 @@ radiobutton = gr.Radio(["Wordnet", "Gensim"])
|
|
251 |
|
252 |
def generate_question(context1,radiobutton):
|
253 |
# try:
|
254 |
-
|
255 |
-
f = open("The_audio.txt", "w+")
|
256 |
-
context=f.read()
|
257 |
summary_text = summarizer(context,summary_model,summary_tokenizer)
|
258 |
for wrp in wrap(summary_text, 150):
|
259 |
print (wrp)
|
|
|
37 |
# Models #
|
38 |
###############################################
|
39 |
|
40 |
+
# summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
|
41 |
+
# summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
|
42 |
+
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
43 |
+
# summary_model = summary_model.to(device)
|
44 |
|
45 |
|
46 |
+
# glove_file = 'glove.6B.300d.txt'
|
47 |
+
# tmp_file = 'word2vec-glove.6B.300d.txt'
|
48 |
+
# glove2word2vec(glove_file, tmp_file)
|
49 |
+
# model = KeyedVectors.load_word2vec_format(tmp_file)
|
50 |
|
51 |
|
52 |
+
# question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
|
53 |
+
# question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
|
54 |
+
# question_model = question_model.to(device)
|
55 |
|
56 |
###############################################
|
57 |
def Process_audio(fileName):
|
58 |
text=''
|
59 |
+
txtf=[]
|
60 |
myaudio=AudioSegment.from_wav(fileName)
|
61 |
chunks_length_ms=8000
|
62 |
chunks=make_chunks(myaudio,chunks_length_ms)
|
|
|
71 |
|
72 |
try:
|
73 |
rec=r.recognize_google(audio_listened)
|
74 |
+
txtf.append(rec+".")
|
75 |
text+=rec+"."
|
76 |
except sr.UnknownValueError:
|
77 |
print("I dont recognize your audio")
|
|
|
85 |
|
86 |
def UrlToAudio(VideoUrl):
|
87 |
url=VideoUrl
|
88 |
+
text=[]
|
89 |
os.system("yt-dlp -x --audio-format wav " + url)
|
90 |
# load audio and pad/trim it to fit 30 seconds
|
91 |
base_path = Path(r"")
|
92 |
for wav_file_path in base_path.glob("*.wav"):
|
93 |
+
text.append(Process_audio(str(wav_file_path)))
|
94 |
break
|
95 |
+
return ''.join(text)
|
96 |
|
97 |
def set_seed(seed: int):
|
98 |
random.seed(seed)
|
|
|
253 |
|
254 |
def generate_question(context1,radiobutton):
|
255 |
# try:
|
256 |
+
context=UrlToAudio(context1)
|
257 |
+
# f = open("The_audio.txt", "w+")
|
258 |
+
# context=f.read()
|
259 |
summary_text = summarizer(context,summary_model,summary_tokenizer)
|
260 |
for wrp in wrap(summary_text, 150):
|
261 |
print (wrp)
|