AndrewRWilliams commited on
Commit
89d17ad
1 Parent(s): 4a2cf48

file.orig_name for audio?

Browse files
Files changed (1) hide show
  1. app.py +22 -5
app.py CHANGED
@@ -1,30 +1,47 @@
1
  import gradio as gr
2
  import os
 
3
 
4
  import whisper
5
  from whisper.utils import write_srt
6
 
7
  model = whisper.load_model("base")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def transcribe(file):
10
  result = model.transcribe(file)
11
 
12
  #transcript
13
- with open(os.path.basename(file) + "-transcript.txt", 'w', encoding="utf-8") as f:
14
  f.write(result['text'])
15
 
16
  #subtitles
17
- with open(os.path.basename(file) + "-subs.srt", 'w', encoding="utf-8") as srt:
18
  write_srt(result["segments"], file=srt)
19
 
20
  download = []
21
- download.append(os.path.basename(file) + "-subs.srt");
22
- download.append(os.path.basename(file) + "-transcript.txt");
23
 
24
  return download
25
 
26
  iface = gr.Interface(
27
- title = 'Whisper transcription and subtitles from file.',
28
  fn=transcribe,
29
  inputs=[
30
  gr.inputs.Audio(source="upload", type="filepath", label="Upload Audio")
 
1
  import gradio as gr
2
  import os
3
+ import unicodedata
4
 
5
  import whisper
6
  from whisper.utils import write_srt
7
 
8
  model = whisper.load_model("base")
9
 
10
+ def slugify(value, allow_unicode=False):
11
+ """
12
+ Taken from https://github.com/django/django/blob/master/django/utils/text.py
13
+ Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
14
+ dashes to single dashes. Remove characters that aren't alphanumerics,
15
+ underscores, or hyphens. Convert to lowercase. Also strip leading and
16
+ trailing whitespace, dashes, and underscores.
17
+ """
18
+ value = str(value)
19
+ if allow_unicode:
20
+ value = unicodedata.normalize('NFKC', value)
21
+ else:
22
+ value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
23
+ value = re.sub(r'[^\w\s-]', '', value.lower())
24
+ return re.sub(r'[-\s]+', '-', value).strip('-_')
25
+
26
  def transcribe(file):
27
  result = model.transcribe(file)
28
 
29
  #transcript
30
+ with open(str(file.orig_name) + "-transcript.txt", 'w', encoding="utf-8") as f:
31
  f.write(result['text'])
32
 
33
  #subtitles
34
+ with open(str(file.orig_name) + "-subs.srt", 'w', encoding="utf-8") as srt:
35
  write_srt(result["segments"], file=srt)
36
 
37
  download = []
38
+ download.append(str(file.orig_name) + "-subs.srt");
39
+ download.append(str(file.orig_name) + "-transcript.txt");
40
 
41
  return download
42
 
43
  iface = gr.Interface(
44
+ title = 'Whisper transcription and subtitles from file',
45
  fn=transcribe,
46
  inputs=[
47
  gr.inputs.Audio(source="upload", type="filepath", label="Upload Audio")