Spaces:

asach
/

Catalog-Digitization

Sleeping

gamingflexer commited on Feb 14, 2024

Commit

23f8016

1 Parent(s): eeea261

Add dependencies and update API key handling

Files changed (3) hide show

requirements.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 gradio
-pandas

 gradio
+pandas
+soundfile
+langchain==0.1.6
+openai

src/app_utils.py CHANGED Viewed

@@ -1,13 +1,9 @@
 from textwrap import dedent
-import base64
-import requests
 from openai import OpenAI
-import os
 from decouple import config
-import json
-OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
 voice_edit = dedent("""
         ### Instruction:

 from textwrap import dedent
 from openai import OpenAI
 from decouple import config
+import json,os
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 voice_edit = dedent("""
         ### Instruction:

src/audio_text.py CHANGED Viewed

@@ -6,7 +6,7 @@ from openai import OpenAI
 from decouple import config
 import os
-OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
 client = OpenAI()
 os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
@@ -18,22 +18,22 @@ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
-def whisper_pipeline(audio_path):
-    model = whisper.load_model("medium")
-    # load audio and pad/trim it to fit 30 seconds
-    audio = whisper.load_audio(audio_path)
-    audio = whisper.pad_or_trim(audio)
-    # make log-Mel spectrogram and move to the same device as the model
-    mel = whisper.log_mel_spectrogram(audio).to(model.device)
-    # detect the spoken language
-    _, probs = model.detect_language(mel)
-    print(f"Detected language: {max(probs, key=probs.get)}")
-    # decode the audio
-    options = whisper.DecodingOptions()
-    result = whisper.decode(model, mel, options)
-    # print the recognized text
-    print(result.text)
-    return result.text
 def whisper_openai(audio_path):
    audio_file= open(audio_path, "rb")

 from decouple import config
 import os
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 client = OpenAI()
 os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+# def whisper_pipeline(audio_path):
+#     model = whisper.load_model("medium")
+#     # load audio and pad/trim it to fit 30 seconds
+#     audio = whisper.load_audio(audio_path)
+#     audio = whisper.pad_or_trim(audio)
+#     # make log-Mel spectrogram and move to the same device as the model
+#     mel = whisper.log_mel_spectrogram(audio).to(model.device)
+#     # detect the spoken language
+#     _, probs = model.detect_language(mel)
+#     print(f"Detected language: {max(probs, key=probs.get)}")
+#     # decode the audio
+#     options = whisper.DecodingOptions()
+#     result = whisper.decode(model, mel, options)
+#     # print the recognized text
+#     print(result.text)
+#     return result.text
 def whisper_openai(audio_path):
    audio_file= open(audio_path, "rb")