Spaces:
Sleeping
Sleeping
Update functions.py
Browse files- functions.py +3 -21
functions.py
CHANGED
@@ -2,11 +2,9 @@ import tiktoken
|
|
2 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
3 |
from langchain_chroma import Chroma
|
4 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
5 |
-
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
6 |
from transformers import pipeline
|
7 |
from app_config import VECTOR_MAX_TOKENS, VECTORS_TOKEN_OVERLAP_SIZE
|
8 |
from langchain.docstore.document import Document
|
9 |
-
import torch
|
10 |
from pytube import YouTube
|
11 |
from dotenv import load_dotenv
|
12 |
from pathlib import Path
|
@@ -44,25 +42,9 @@ def save_audio_file(url):
|
|
44 |
print("Connection Error")
|
45 |
|
46 |
def get_audio_transcription():
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
51 |
-
model_id = "distil-whisper/distil-large-v2"
|
52 |
-
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
53 |
-
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
54 |
-
)
|
55 |
-
model.to(device)
|
56 |
-
processor = AutoProcessor.from_pretrained(model_id)
|
57 |
-
whisper = pipeline(
|
58 |
-
"automatic-speech-recognition",
|
59 |
-
model=model,
|
60 |
-
tokenizer=processor.tokenizer,
|
61 |
-
feature_extractor=processor.feature_extractor,
|
62 |
-
max_new_tokens=128,
|
63 |
-
torch_dtype=torch_dtype,
|
64 |
-
device=device,
|
65 |
-
)
|
66 |
transcription = whisper("yt_audio.mp4",
|
67 |
chunk_length_s=30,
|
68 |
stride_length_s=5,
|
|
|
2 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
3 |
from langchain_chroma import Chroma
|
4 |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
|
|
5 |
from transformers import pipeline
|
6 |
from app_config import VECTOR_MAX_TOKENS, VECTORS_TOKEN_OVERLAP_SIZE
|
7 |
from langchain.docstore.document import Document
|
|
|
8 |
from pytube import YouTube
|
9 |
from dotenv import load_dotenv
|
10 |
from pathlib import Path
|
|
|
42 |
print("Connection Error")
|
43 |
|
44 |
def get_audio_transcription():
|
45 |
+
whisper = pipeline("automatic-speech-recognition",
|
46 |
+
"openai/whisper-large-v3")
|
47 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
transcription = whisper("yt_audio.mp4",
|
49 |
chunk_length_s=30,
|
50 |
stride_length_s=5,
|