Spaces:

akshayballal
/

search_in_audio

Runtime error

App Files Files Community

search_in_audio / app.py

akshayballal

Update app.py

beff119 verified 5 months ago

raw

history blame contribute delete

4.3 kB

	import embed_anything
	from embed_anything import EmbedData
	from tqdm.autonotebook import tqdm
	from pinecone import Pinecone, ServerlessSpec
	import numpy as np
	import os
	from pinecone import PineconeApiException
	import uuid
	import re
	import gradio as gr


	audio_files = ["samples_hp0.wav", "samples_gb0.wav"]

	embeddings: list[list[EmbedData]] = []

	for file in audio_files:
	embedding = embed_anything.embed_file(file, "Whisper-Jina")
	embeddings.append(embedding)

	pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
	pc.delete_index("search-in-audio")
	try:
	index = pc.create_index(
	name="search-in-audio",
	dimension=768, # Replace with your model dimensions
	metric="cosine", # Replace with your model metric
	spec=ServerlessSpec(cloud="aws", region="us-east-1"),
	)
	index = pc.Index("search-in-audio")
	except PineconeApiException as e:
	index = pc.Index("search-in-audio")
	if e.status == 409:
	print("Index already exists")

	else:
	print(e)


	## convert embeddings which is of the form EmbedData : text, embedding, metadata to the form required by pinecone which is id, values, metadata
	def convert_to_pinecone_format(embeddings: list[list[EmbedData]]):
	data = []
	for i, embedding in enumerate(embeddings):
	for j, emb in enumerate(embedding):
	data.append(
	{
	"id": str(uuid.uuid4()),
	"values": emb.embedding,
	"metadata": {
	"text": emb.text,
	"start": emb.metadata["start"],
	"end": emb.metadata["end"],
	"file": re.split(r"/\|\\", emb.metadata["file_name"])[-1],
	},
	}
	)
	return data


	data = convert_to_pinecone_format(embeddings)
	index.upsert(data)


	files = ["samples_hp0.wav", "samples_gb0.wav"]


	def search(query, audio):

	results = []
	query = embed_anything.embed_query([query], "Jina")[0]

	if re.split(r"/\|\\", audio)[-1] not in files:
	print(file, re.split(r"/\|\\", audio)[-1])
	embeddings = embed_anything.embed_file(audio, "Whisper-Jina")
	embeddings = convert_to_pinecone_format([embeddings])
	index.upsert(embeddings)

	files.append(re.split(r"/\|\\", audio)[-1])

	result = index.query(
	vector=query.embedding,
	top_k=5,
	include_metadata=True,
	)
	for res in result.matches:
	results.append(res.metadata)

	formatted_results = []
	for result in results:
	display_text = f"""

	`File: {result['file']}`

	`Start: {result['start']}`

	`End: {result['end']}`

	Text: {result['text']}"""
	formatted_results.append(display_text)

	return (
	formatted_results[0],
	results[0]["file"],
	formatted_results[1],
	results[1]["file"],
	formatted_results[2],
	results[2]["file"],
	)


	demo = gr.Interface(
	title="Search 🔎 in Audio 🎙️",
	description="""

	<img width=250 src = "https://res.cloudinary.com/dltwftrgc/image/upload/v1712504276/Projects/EmbedAnything_500_x_200_px_a4l8xu.png">

	# Search within audio files using text queries.

	## Models used:

	- Audio Decoder: [openai/whisper-tiny.en](https://huggingface.co/openai/whisper-tiny.en)
	- Embedding Model: [Jina Embeddings v2 base-en](https://huggingface.co/jinaai/jina-embeddings-v2-base-en)

	## Vector Database used: Pinecone

	## Powered by [EmbedAnything by Starlight](https://github.com/StarlightSearch/EmbedAnything) 🚀


	""",
	article = "Created by [Akshay Ballal](https://www.akshaymakes.com)",

	fn=search,
	inputs=["text", gr.Audio(label="Audio", type="filepath")],
	outputs=[
	gr.Markdown(label="Text"),
	gr.Audio(label="Audio", type="filepath"),
	gr.Markdown(label="Text"),
	gr.Audio(label="Audio", type="filepath"),
	gr.Markdown(label="Text"),
	gr.Audio(label="Audio", type="filepath"),
	],
	examples=[
	["screwdriver", "samples_hp0.wav"],
	["united states", "samples_gb0.wav"],
	["united states", "samples_hp0.wav"],
	],
	)
	demo.launch()