Spaces:
Sleeping
Sleeping
Ragnov
commited on
Commit
•
af969af
1
Parent(s):
ea1d6e2
update the model by adding trained GEC
Browse files- app.py +105 -28
- requirements.txt +1 -0
app.py
CHANGED
@@ -2,18 +2,19 @@
|
|
2 |
from pytube import YouTube
|
3 |
import whisper
|
4 |
import gradio as gr
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
|
|
|
|
7 |
|
8 |
# Functions
|
9 |
def transcribe(file):
|
10 |
options = dict(task="transcribe", best_of=5)
|
11 |
-
text =
|
12 |
-
return text.strip()
|
13 |
-
|
14 |
-
def translate(file):
|
15 |
-
options = dict(task="translate", best_of=5)
|
16 |
-
text = model.transcribe(file, **options)["text"]
|
17 |
return text.strip()
|
18 |
|
19 |
def get_filename(file_obj):
|
@@ -23,7 +24,7 @@ def inference(link):
|
|
23 |
yt = YouTube(link)
|
24 |
path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
|
25 |
options = whisper.DecodingOptions(without_timestamps=True)
|
26 |
-
results =
|
27 |
return results['text']
|
28 |
|
29 |
def populate_metadata(link):
|
@@ -33,9 +34,62 @@ def populate_metadata(link):
|
|
33 |
def transcribe_file(file):
|
34 |
options = dict(task="transcribe", best_of=5)
|
35 |
file = get_filename(file)
|
36 |
-
text =
|
37 |
return text.strip()
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Gradio Blocks
|
40 |
demo = gr.Blocks()
|
41 |
with demo:
|
@@ -43,16 +97,24 @@ with demo:
|
|
43 |
with gr.Tabs():
|
44 |
with gr.TabItem("Voice Record"):
|
45 |
with gr.Row():
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
50 |
with gr.TabItem("Upload File"):
|
51 |
with gr.Row():
|
52 |
file_upload = gr.File()
|
53 |
text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output")
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
56 |
with gr.TabItem("Youtube Link"):
|
57 |
with gr.Box():
|
58 |
link = gr.Textbox(label="YouTube Link")
|
@@ -62,13 +124,17 @@ with demo:
|
|
62 |
text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5)
|
63 |
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
64 |
transcribe_button3 = gr.Button("Transcribe")
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
<p style="text-align: center;"> Thesis System presented by <br/> <br/>
|
70 |
-
• <b>Jomari A. Buenaobra</b> <br/>
|
71 |
-
• <b>Christian G. Eslit</b> <br/>
|
72 |
• <b>Daniel L. Espinola</b> <br/>
|
73 |
• <b>Jhon Vincent A. Gupo</b> <br/>
|
74 |
• <b>Ryan M. Ibay</b> <br/> <br/>
|
@@ -77,13 +143,24 @@ with demo:
|
|
77 |
Laguna State Polytechnic University - Los Baños Campus . <br/> <br/>
|
78 |
We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality. <br/>
|
79 |
• <b>Crisanto F. Gulay</b> - Adviser <br/>
|
80 |
-
• <b>
|
81 |
</p>
|
82 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
demo.launch()
|
|
|
2 |
from pytube import YouTube
|
3 |
import whisper
|
4 |
import gradio as gr
|
5 |
+
import time
|
6 |
+
import re
|
7 |
+
from happytransformer import HappyTextToText, TTSettings
|
8 |
+
from difflib import Differ
|
9 |
|
10 |
+
STTmodel = whisper.load_model("base.en")
|
11 |
+
GCmodel = HappyTextToText("T5", "Ragnov/T5-Base-Grammar-Checker")
|
12 |
+
args = TTSettings(num_beams=5, min_length=1)
|
13 |
|
14 |
# Functions
|
15 |
def transcribe(file):
|
16 |
options = dict(task="transcribe", best_of=5)
|
17 |
+
text = STTmodel.transcribe(file, **options)["text"]
|
|
|
|
|
|
|
|
|
|
|
18 |
return text.strip()
|
19 |
|
20 |
def get_filename(file_obj):
|
|
|
24 |
yt = YouTube(link)
|
25 |
path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
|
26 |
options = whisper.DecodingOptions(without_timestamps=True)
|
27 |
+
results = STTmodel.transcribe(path)
|
28 |
return results['text']
|
29 |
|
30 |
def populate_metadata(link):
|
|
|
34 |
def transcribe_file(file):
|
35 |
options = dict(task="transcribe", best_of=5)
|
36 |
file = get_filename(file)
|
37 |
+
text = STTmodel.transcribe(file, **options)["text"]
|
38 |
return text.strip()
|
39 |
|
40 |
+
def real_time_transcribe(audio, state=""):
|
41 |
+
time.sleep(2)
|
42 |
+
text = STTmodel.transcribe(audio)["text"]
|
43 |
+
state += text + " "
|
44 |
+
return state, state
|
45 |
+
|
46 |
+
def paragraph_to_sentences(paragraph):
|
47 |
+
"""
|
48 |
+
This function takes a paragraph as input and returns a list of sentences.
|
49 |
+
|
50 |
+
Args:
|
51 |
+
paragraph (str): The paragraph to be converted to a list of sentences.
|
52 |
+
|
53 |
+
Returns:
|
54 |
+
list: A list of sentences extracted from the paragraph.
|
55 |
+
"""
|
56 |
+
# Split the paragraph into sentences using a period, exclamation mark or question mark as the delimiter.
|
57 |
+
sentences = re.split(r'(?<=[^A-Z].[.?!]) +(?=[A-Z])|(?<=[^A-Z][!]) +(?=[A-Z])', paragraph)
|
58 |
+
|
59 |
+
# Remove any leading or trailing spaces from each sentence.
|
60 |
+
sentences = [sentence.strip() for sentence in sentences]
|
61 |
+
|
62 |
+
return sentences
|
63 |
+
|
64 |
+
def sentences_to_paragraph(sentences):
|
65 |
+
final_result = ""
|
66 |
+
for num, sentence in enumerate(sentences):
|
67 |
+
result = GCmodel.generate_text("grammar: "+ sentence, args=args)
|
68 |
+
final_result += result.text
|
69 |
+
if num < len(sentences) - 1:
|
70 |
+
final_result += " "
|
71 |
+
|
72 |
+
return final_result
|
73 |
+
|
74 |
+
# Function that takes transcribed result and gramify it
|
75 |
+
def gramify(paragraph):
|
76 |
+
result_1 = paragraph_to_sentences(paragraph)
|
77 |
+
final_result = sentences_to_paragraph(result_1)
|
78 |
+
return final_result
|
79 |
+
|
80 |
+
# Function that takes transcribed text for its first inpu
|
81 |
+
def diff_texts(text1, text2):
|
82 |
+
"""
|
83 |
+
This function takes transcribed text for its first input
|
84 |
+
and grammatically corrected text as its second input which return the difference
|
85 |
+
of the two text.
|
86 |
+
"""
|
87 |
+
d = Differ()
|
88 |
+
return [
|
89 |
+
(token[2:], token[0] if token[0] != " " else None)
|
90 |
+
for token in d.compare(text1, text2)
|
91 |
+
]
|
92 |
+
res_diff = []
|
93 |
# Gradio Blocks
|
94 |
demo = gr.Blocks()
|
95 |
with demo:
|
|
|
97 |
with gr.Tabs():
|
98 |
with gr.TabItem("Voice Record"):
|
99 |
with gr.Row():
|
100 |
+
audio = gr.Audio(show_label=False,source="microphone",type="filepath")
|
101 |
+
text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output")
|
102 |
+
with gr.Row():
|
103 |
+
transcribe_button1 = gr.Button("Transcribe")
|
104 |
+
with gr.Row():
|
105 |
+
Grammar_text_output1 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
|
106 |
+
with gr.Row():
|
107 |
+
Diff_text_output1 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
|
108 |
with gr.TabItem("Upload File"):
|
109 |
with gr.Row():
|
110 |
file_upload = gr.File()
|
111 |
text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output")
|
112 |
+
with gr.Row():
|
113 |
+
transcribe_button2 = gr.Button("Transcribe")
|
114 |
+
with gr.Row():
|
115 |
+
Grammar_text_output2 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
|
116 |
+
with gr.Row():
|
117 |
+
Diff_text_output2 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
|
118 |
with gr.TabItem("Youtube Link"):
|
119 |
with gr.Box():
|
120 |
link = gr.Textbox(label="YouTube Link")
|
|
|
124 |
text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5)
|
125 |
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
126 |
transcribe_button3 = gr.Button("Transcribe")
|
127 |
+
with gr.Row():
|
128 |
+
Grammar_text_output3 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
|
129 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
130 |
+
Diff_text_output3 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
|
131 |
+
gr.Markdown("""<p style="text-align: center;"> Not Satisfied with the result? </br>
|
132 |
+
<a href="https://forms.gle/yZA5DBygMUNmLZtv7">Click here to help us make it better.</a>
|
133 |
+
</p>""")
|
134 |
+
|
135 |
+
with gr.Accordion("About",open=False):
|
136 |
+
gr.Markdown("""
|
137 |
<p style="text-align: center;"> Thesis System presented by <br/> <br/>
|
|
|
|
|
138 |
• <b>Daniel L. Espinola</b> <br/>
|
139 |
• <b>Jhon Vincent A. Gupo</b> <br/>
|
140 |
• <b>Ryan M. Ibay</b> <br/> <br/>
|
|
|
143 |
Laguna State Polytechnic University - Los Baños Campus . <br/> <br/>
|
144 |
We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality. <br/>
|
145 |
• <b>Crisanto F. Gulay</b> - Adviser <br/>
|
146 |
+
• <b>Gene Marck B. Catedrilla</b> - Subject Specialist <br/>
|
147 |
</p>
|
148 |
""")
|
149 |
+
link.change(populate_metadata, inputs=[link], outputs=[img, title])
|
150 |
+
|
151 |
+
# Transcription
|
152 |
+
transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1)
|
153 |
+
transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2)
|
154 |
+
transcribe_button3.click(inference, inputs=link, outputs=text_link_output)
|
155 |
+
|
156 |
+
# Gramify
|
157 |
+
text_output1.change(gramify,inputs=text_output1,outputs=Grammar_text_output1)
|
158 |
+
text_output2.change(gramify,inputs=text_output2,outputs=Grammar_text_output2)
|
159 |
+
text_link_output.change(gramify, inputs=text_link_output ,outputs=Grammar_text_output3)
|
160 |
|
161 |
+
# For Text Difference
|
162 |
+
Grammar_text_output1.change(diff_texts,inputs=[text_output1,Grammar_text_output1],outputs=Diff_text_output1)
|
163 |
+
Grammar_text_output2.change(diff_texts,inputs=[text_output2,Grammar_text_output2],outputs=Diff_text_output2)
|
164 |
+
Grammar_text_output3.change(diff_texts,inputs=[text_link_output,Grammar_text_output3],outputs=Diff_text_output3)
|
165 |
+
|
166 |
+
demo.launch(share=True)
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
pytube==12.1.2
|
2 |
gradio
|
3 |
git+https://github.com/openai/whisper.git
|
|
|
|
1 |
pytube==12.1.2
|
2 |
gradio
|
3 |
git+https://github.com/openai/whisper.git
|
4 |
+
happytransformer
|