tomiwa1a
/

video-search

Automatic Speech Recognition

generic

audio

endpoints-template

Inference Endpoints

Model card Files Files and versions Community

tomiwa1a commited on Jan 16, 2023

Commit

93a37ae

•

1 Parent(s): 81a0884

add support for query to be a string or an array of sentences that need encoding

Browse files

Files changed (1) hide show

handler.py +9 -7

handler.py CHANGED Viewed

@@ -9,23 +9,24 @@ import time
 class EndpointHandler():
     def __init__(self, path=""):
-        # load the model
-        WHISPER_MODEL_NAME = "tiny.en"
-        SENTENCE_TRANSFORMER_MODEL_NAME = "multi-qa-mpnet-base-dot-v1"
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f'whisper will use: {device}')
         t0 = time.time()
-        self.whisper_model = whisper.load_model(WHISPER_MODEL_NAME).to(device)
         t1 = time.time()
         total = t1 - t0
         print(f'Finished loading whisper_model in {total} seconds')
         t0 = time.time()
-        self.sentence_transformer_model = SentenceTransformer(SENTENCE_TRANSFORMER_MODEL_NAME)
         t1 = time.time()
         total = t1 - t0
@@ -50,6 +51,7 @@ class EndpointHandler():
         encoded_segments = {}
         if video_url:
             video_with_transcript = self.transcribe_video(video_url)
             encode_transcript = data.pop("encode_transcript", True)
             if encode_transcript:
                 encoded_segments = self.combine_transcripts(video_with_transcript)
@@ -61,7 +63,7 @@ class EndpointHandler():
                 **encoded_segments
             }
         elif query:
-            query = [{"text": query, "id": ""}]
             encoded_segments = self.encode_sentences(query)
             return {
@@ -131,7 +133,7 @@ class EndpointHandler():
             batch_details = [
                 {
                     **batch_meta[x],
-                    'vectors':batch_vectors[x]
                 } for x in range(0, len(batch_meta))
             ]
             all_batches.extend(batch_details)

 class EndpointHandler():
+    # load the model
+    WHISPER_MODEL_NAME = "tiny.en"
+    SENTENCE_TRANSFORMER_MODEL_NAME = "multi-qa-mpnet-base-dot-v1"
     def __init__(self, path=""):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f'whisper will use: {device}')
         t0 = time.time()
+        self.whisper_model = whisper.load_model(self.WHISPER_MODEL_NAME).to(device)
         t1 = time.time()
         total = t1 - t0
         print(f'Finished loading whisper_model in {total} seconds')
         t0 = time.time()
+        self.sentence_transformer_model = SentenceTransformer(self.SENTENCE_TRANSFORMER_MODEL_NAME)
         t1 = time.time()
         total = t1 - t0
         encoded_segments = {}
         if video_url:
             video_with_transcript = self.transcribe_video(video_url)
+            video_with_transcript['transcript']['transcription_source'] = f"whisper_{self.WHISPER_MODEL_NAME}"
             encode_transcript = data.pop("encode_transcript", True)
             if encode_transcript:
                 encoded_segments = self.combine_transcripts(video_with_transcript)
                 **encoded_segments
             }
         elif query:
+            query = [{"text": query, "id": ""}] if isinstance(query, str) else query
             encoded_segments = self.encode_sentences(query)
             return {
             batch_details = [
                 {
                     **batch_meta[x],
+                    'vectors': batch_vectors[x]
                 } for x in range(0, len(batch_meta))
             ]
             all_batches.extend(batch_details)