bofenghuang commited on
Commit
e7578b0
1 Parent(s): 1455f15
Files changed (2) hide show
  1. packages.txt +2 -2
  2. run_demo_openai.py +14 -14
packages.txt CHANGED
@@ -1,2 +1,2 @@
1
- ffmpeg
2
- git+https://github.com/openai/whisper.git
 
1
+ git+https://github.com/openai/whisper.git
2
+ ffmpeg
run_demo_openai.py CHANGED
@@ -12,9 +12,6 @@ import whisper
12
  warnings.filterwarnings("ignore")
13
  disable_progress_bar()
14
 
15
- MODEL_NAME = "bofenghuang/whisper-large-v2-cv11-french"
16
- CHECKPOINT_FILENAME = "checkpoint_openai.pt"
17
-
18
  logging.basicConfig(
19
  format="%(asctime)s [%(levelname)s] [%(name)s] %(message)s",
20
  datefmt="%Y-%m-%dT%H:%M:%SZ",
@@ -22,14 +19,10 @@ logging.basicConfig(
22
  logger = logging.getLogger(__name__)
23
  logger.setLevel(logging.DEBUG)
24
 
25
- device = 0 if torch.cuda.is_available() else "cpu"
26
-
27
- downloaded_model_path = hf_hub_download(repo_id=MODEL_NAME, filename=CHECKPOINT_FILENAME)
28
-
29
- model = whisper.load_model(downloaded_model_path, device=device)
30
- logger.info(f"Model has been loaded on device `{device}`")
31
 
32
- gen_kwargs = {
33
  "task": "transcribe",
34
  "language": "fr",
35
  # "without_timestamps": True,
@@ -43,6 +36,14 @@ gen_kwargs = {
43
  # "no_speech_threshold": None,
44
  }
45
 
 
 
 
 
 
 
 
 
46
  def transcribe(microphone, file_upload):
47
  warn_output = ""
48
  if (microphone is not None) and (file_upload is not None):
@@ -56,7 +57,7 @@ def transcribe(microphone, file_upload):
56
 
57
  file = microphone if microphone is not None else file_upload
58
 
59
- text = model.transcribe(file, **gen_kwargs)["text"]
60
 
61
  logger.info(f"Transcription: {text}")
62
 
@@ -66,8 +67,7 @@ def transcribe(microphone, file_upload):
66
  def _return_yt_html_embed(yt_url):
67
  video_id = yt_url.split("?v=")[-1]
68
  HTML_str = (
69
- f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
70
- " </center>"
71
  )
72
  return HTML_str
73
 
@@ -78,7 +78,7 @@ def yt_transcribe(yt_url):
78
  stream = yt.streams.filter(only_audio=True)[0]
79
  stream.download(filename="audio.mp3")
80
 
81
- text = model.transcribe("audio.mp3", **gen_kwargs)["text"]
82
 
83
  logger.info(f'Transcription of "{yt_url}": {text}')
84
 
 
12
  warnings.filterwarnings("ignore")
13
  disable_progress_bar()
14
 
 
 
 
15
  logging.basicConfig(
16
  format="%(asctime)s [%(levelname)s] [%(name)s] %(message)s",
17
  datefmt="%Y-%m-%dT%H:%M:%SZ",
 
19
  logger = logging.getLogger(__name__)
20
  logger.setLevel(logging.DEBUG)
21
 
22
+ MODEL_NAME = "bofenghuang/whisper-large-v2-cv11-french"
23
+ CHECKPOINT_FILENAME = "checkpoint_openai.pt"
 
 
 
 
24
 
25
+ GEN_KWARGS = {
26
  "task": "transcribe",
27
  "language": "fr",
28
  # "without_timestamps": True,
 
36
  # "no_speech_threshold": None,
37
  }
38
 
39
+ device = 0 if torch.cuda.is_available() else "cpu"
40
+
41
+ downloaded_model_path = hf_hub_download(repo_id=MODEL_NAME, filename=CHECKPOINT_FILENAME)
42
+
43
+ model = whisper.load_model(downloaded_model_path, device=device)
44
+ logger.info(f"Model has been loaded on device `{device}`")
45
+
46
+
47
  def transcribe(microphone, file_upload):
48
  warn_output = ""
49
  if (microphone is not None) and (file_upload is not None):
 
57
 
58
  file = microphone if microphone is not None else file_upload
59
 
60
+ text = model.transcribe(file, **GEN_KWARGS)["text"]
61
 
62
  logger.info(f"Transcription: {text}")
63
 
 
67
  def _return_yt_html_embed(yt_url):
68
  video_id = yt_url.split("?v=")[-1]
69
  HTML_str = (
70
+ f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>' " </center>"
 
71
  )
72
  return HTML_str
73
 
 
78
  stream = yt.streams.filter(only_audio=True)[0]
79
  stream.download(filename="audio.mp3")
80
 
81
+ text = model.transcribe("audio.mp3", **GEN_KWARGS)["text"]
82
 
83
  logger.info(f'Transcription of "{yt_url}": {text}')
84