|
""" |
|
Speech-to-text module based on Whisper for SillyTavern Extras |
|
- Whisper github: https://github.com/openai/whisper |
|
|
|
Authors: |
|
- Tony Ribeiro (https://github.com/Tony-sama) |
|
|
|
Models are saved into user cache folder, example: C:/Users/toto/.cache/whisper |
|
|
|
References: |
|
- Code adapted from: |
|
- whisper github: https://github.com/openai/whisper |
|
- oobabooga text-generation-webui github: https://github.com/oobabooga/text-generation-webui |
|
""" |
|
from flask import jsonify, abort, request |
|
|
|
import whisper |
|
|
|
DEBUG_PREFIX = "<stt whisper module>" |
|
RECORDING_FILE_PATH = "stt_test.wav" |
|
|
|
model = None |
|
|
|
def load_model(file_path=None): |
|
""" |
|
Load given vosk model from file or default to en-us model. |
|
Download model to user cache folder, example: C:/Users/toto/.cache/vosk |
|
""" |
|
|
|
if file_path is None: |
|
return whisper.load_model("base.en") |
|
else: |
|
return whisper.load_model(file_path) |
|
|
|
def process_audio(): |
|
""" |
|
Transcript request audio file to text using Whisper |
|
""" |
|
|
|
if model is None: |
|
print(DEBUG_PREFIX,"Whisper model not initialized yet.") |
|
return "" |
|
|
|
try: |
|
file = request.files.get('AudioFile') |
|
file.save(RECORDING_FILE_PATH) |
|
|
|
result = model.transcribe(RECORDING_FILE_PATH) |
|
transcript = result["text"] |
|
print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript) |
|
|
|
return jsonify({"transcript": transcript}) |
|
|
|
except Exception as e: |
|
print(e) |
|
abort(500, DEBUG_PREFIX+" Exception occurs while processing audio") |