whisper-demo-french

Sleeping

App Files Files Community

bofenghuang commited on Feb 13, 2023

Commit

e7578b0

•

1 Parent(s): 1455f15

up

Browse files

Files changed (2) hide show

packages.txt +2 -2
run_demo_openai.py +14 -14

packages.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- ~~ffmpeg~~
2	- ~~git+https://github.com/openai/whisper.git~~


1	+ git+https://github.com/openai/whisper.git
2	+ ffmpeg

run_demo_openai.py CHANGED Viewed

@@ -12,9 +12,6 @@ import whisper
 warnings.filterwarnings("ignore")
 disable_progress_bar()
-MODEL_NAME = "bofenghuang/whisper-large-v2-cv11-french"
-CHECKPOINT_FILENAME = "checkpoint_openai.pt"
 logging.basicConfig(
     format="%(asctime)s [%(levelname)s] [%(name)s] %(message)s",
     datefmt="%Y-%m-%dT%H:%M:%SZ",
@@ -22,14 +19,10 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
-device = 0 if torch.cuda.is_available() else "cpu"
-downloaded_model_path = hf_hub_download(repo_id=MODEL_NAME, filename=CHECKPOINT_FILENAME)
-model = whisper.load_model(downloaded_model_path, device=device)
-logger.info(f"Model has been loaded on device `{device}`")
-gen_kwargs = {
     "task": "transcribe",
     "language": "fr",
     # "without_timestamps": True,
@@ -43,6 +36,14 @@ gen_kwargs = {
     # "no_speech_threshold": None,
 }
 def transcribe(microphone, file_upload):
     warn_output = ""
     if (microphone is not None) and (file_upload is not None):
@@ -56,7 +57,7 @@ def transcribe(microphone, file_upload):
     file = microphone if microphone is not None else file_upload
-    text = model.transcribe(file, **gen_kwargs)["text"]
     logger.info(f"Transcription: {text}")
@@ -66,8 +67,7 @@ def transcribe(microphone, file_upload):
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
-        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
-        " </center>"
     )
     return HTML_str
@@ -78,7 +78,7 @@ def yt_transcribe(yt_url):
     stream = yt.streams.filter(only_audio=True)[0]
     stream.download(filename="audio.mp3")
-    text = model.transcribe("audio.mp3", **gen_kwargs)["text"]
     logger.info(f'Transcription of "{yt_url}": {text}')

 warnings.filterwarnings("ignore")
 disable_progress_bar()
 logging.basicConfig(
     format="%(asctime)s [%(levelname)s] [%(name)s] %(message)s",
     datefmt="%Y-%m-%dT%H:%M:%SZ",
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
+MODEL_NAME = "bofenghuang/whisper-large-v2-cv11-french"
+CHECKPOINT_FILENAME = "checkpoint_openai.pt"
+GEN_KWARGS = {
     "task": "transcribe",
     "language": "fr",
     # "without_timestamps": True,
     # "no_speech_threshold": None,
 }
+device = 0 if torch.cuda.is_available() else "cpu"
+downloaded_model_path = hf_hub_download(repo_id=MODEL_NAME, filename=CHECKPOINT_FILENAME)
+model = whisper.load_model(downloaded_model_path, device=device)
+logger.info(f"Model has been loaded on device `{device}`")
 def transcribe(microphone, file_upload):
     warn_output = ""
     if (microphone is not None) and (file_upload is not None):
     file = microphone if microphone is not None else file_upload
+    text = model.transcribe(file, **GEN_KWARGS)["text"]
     logger.info(f"Transcription: {text}")
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
+        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>' " </center>"
     )
     return HTML_str
     stream = yt.streams.filter(only_audio=True)[0]
     stream.download(filename="audio.mp3")
+    text = model.transcribe("audio.mp3", **GEN_KWARGS)["text"]
     logger.info(f'Transcription of "{yt_url}": {text}')