Animalvox / app.py
femiari's picture
Rename main.py to app.py
a10b308 verified
raw
history blame
8 kB
# Import necessary libraries for the project
from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC
import torch
import os
import numpy as np
import faiss
import pandas as pd
import matplotlib.pyplot as plt
import time
import torchaudio
import gc
import math
import gradio as gr
import sys
# Load the model used for audio vectorization
bundle = torchaudio.pipelines.HUBERT_BASE
model = bundle.get_model()
# Variable containing the path to the animals.index file on your computer
index_path = "./animals.index"
# Read the animals.index file
index = faiss.read_index(index_path)
# Variable containing the path to the noms_animaux.txt file
chemin_noms_animaux = './noms_animaux.txt'
# Process the noms_animaux.txt file to link vectors in animals.index to names in noms_animaux.txt
# Read the file content and convert it to a list
with open(chemin_noms_animaux, 'r') as fichier:
# Use a list comprehension to process each line
names = [line.strip().strip("'").strip(",").strip() for line in fichier.readlines()]
def bayes_theorem(df, n_top_vectors=50):
"""
Calculate posterior probabilities using Bayes' theorem.
This function limits the DataFrame to the top n vectors, calculates the sum of similarities
for each category, and computes the posterior probabilities normalized by the total probability.
Parameters:
df (pd.DataFrame): DataFrame containing similarity percentages and categories.
n_top_vectors (int): Number of top vectors to consider.
Returns:
dict: Normalized posterior probabilities for each category.
"""
# Limit the DataFrame to the top n vectors
df_limited = df.head(n_top_vectors)
# Get unique categories and initialize the posterior probabilities dictionary
categories = df_limited['names_normalized'].unique()
probas_a_posteriori = {categorie: 0 for categorie in categories}
# Calculate uniform prior probabilities
probas_a_priori = 1/3
# Sum similarities for each category limited to the top n vectors
for categorie in categories:
somme_similarites = df_limited[df_limited['names_normalized'] == categorie]['percentage'].sum()
probas_a_posteriori[categorie] = somme_similarites * probas_a_priori
# Normalize the posterior probabilities
total_proba = sum(probas_a_posteriori.values())
probas_a_posteriori_normalisees = {categorie: (proba / total_proba) for categorie, proba in probas_a_posteriori.items()}
return probas_a_posteriori_normalisees
def get_name_from_index(index):
"""
Get the animal name corresponding to a given vector index.
Parameters:
index (int): Index of the vector.
Returns:
str: Name of the animal.
"""
return names[index]
def name_normalisation(name):
"""
Normalize animal names.
This function normalizes the names of animals by categorizing them into common types.
Parameters:
name (str): Name of the animal.
Returns:
str: Normalized animal name.
"""
if 'dog' in name:
return "Chien"
elif 'cat' in name:
return "Chat"
elif 'bird' in name:
return "Oiseau"
else:
return "Animal non reconnu"
def exp_negative(x):
"""
Define the negative exponential function.
This function applies the negative exponential transformation to a given value.
Parameters:
x (float): Input value.
Returns:
float: Transformed value.
"""
return math.exp(-x)
def normalization(embeddings):
"""
Normalize vectors.
This function normalizes either a single vector (1D) or a matrix of vectors (2D).
If the input is 1D, it normalizes the single vector; if 2D, it normalizes each row.
Parameters:
embeddings (np.ndarray): Input vector or matrix of vectors.
Returns:
np.ndarray: Normalized vector or matrix of vectors.
"""
# Check if embeddings is a single vector (1D) or a matrix (2D)
if embeddings.ndim == 1:
# Normalize a single vector
norm = np.linalg.norm(embeddings)
if norm == 0:
return embeddings
return embeddings / norm
else:
# Normalize each row of a matrix
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
return embeddings / norms
def get_audio_embedding(audio_path):
"""
Get the audio embedding for a given audio file.
This function loads the audio file, processes it to obtain the emission,
flattens and averages the features, normalizes them, and returns the normalized 2D array.
Parameters:
audio_path (str): Path to the audio file.
Returns:
np.ndarray: Normalized 2D array of audio embedding.
"""
waveform1, sample_rate1 = torchaudio.load(audio_path)
waveform1 = torchaudio.functional.resample(waveform1, sample_rate1, bundle.sample_rate)
with torch.inference_mode():
emission1, _ = model(waveform1)
# Flatten the first two dimensions and keep the third
flattened_features1 = emission1.view(-1, emission1.size(2))
mean_features1 = flattened_features1.mean(dim=0)
mean1_array = mean_features1.cpu().numpy().astype(np.float32)
mean1_normal = normalization(mean1_array)
mean1_normal_2d = mean1_normal[np.newaxis, :]
return mean1_normal_2d
def searchinIndex(index, normal_embedding):
"""
Search for the closest audio vectors in the animals.index file.
This function searches the FAISS index for the most similar vectors to the given input embedding.
Parameters:
index (faiss.Index): The FAISS index to search.
normal_embedding (np.ndarray): The normalized embedding to search for.
Returns:
pd.DataFrame: DataFrame containing distances and indices of the closest vectors.
"""
D, I = index.search(normal_embedding, index.ntotal)
r = pd.DataFrame({'distance': D[0], 'index': I[0]})
return r
def animal_classification(audio_path):
"""
Classify the species of animals from an audio file.
This function extracts the audio embedding, searches the index, calculates similarity percentages,
normalizes the names, and applies Bayes' theorem to determine the most likely animal.
Parameters:
audio_path (str): Path to the audio file.
Returns:
str: Formatted result with animal classifications and their probabilities.
"""
query_audio = get_audio_embedding(audio_path) # Get the audio embedding
results = searchinIndex(index, query_audio) # Search the index
results['percentage'] = results['distance'].apply(exp_negative) * 100 # Calculate the percentage
results['names'] = results['index'].apply(get_name_from_index) # Get names from the index
results['names_normalized'] = results['names'].apply(name_normalisation) # Normalize the names
resultat = bayes_theorem(results, 25)
formatted_result = '\n'.join([f"{animal}: {percentage:.2%}" for animal, percentage in resultat.items()])
return formatted_result
def add_in_index(audio_path):
"""
Add a new audio to the index for better classification.
This function extracts the audio embedding from a new audio file, adds it to the FAISS index,
updates the index file, and appends the name to the names list.
Parameters:
audio_path (str): Path to the audio file to be added.
Returns:
str: Confirmation message indicating the addition was successful.
"""
new_audio = get_audio_embedding(audio_path)
index.add(new_audio)
faiss.write_index(index, index_path)
file_name = os.path.basename(audio_path)
names.append(file_name)
result = "L'ajout a bien effectué"
with open(chemin_noms_animaux, 'w') as fichier:
# Write each name to the file, formatted as a Python list element
for nom in names:
fichier.write(f"'{nom}',\n")
return result
# Create the graphical interface
interface = gr.Interface(fn=animal_classification, inputs="file", outputs="text")
# Launch the interface
interface.launch()