|
|
|
from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC |
|
import torch |
|
import os |
|
import numpy as np |
|
import faiss |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import time |
|
import torchaudio |
|
import gc |
|
import math |
|
import gradio as gr |
|
import sys |
|
|
|
|
|
bundle = torchaudio.pipelines.HUBERT_BASE |
|
model = bundle.get_model() |
|
|
|
|
|
index_path = "./animals.index" |
|
|
|
index = faiss.read_index(index_path) |
|
|
|
|
|
chemin_noms_animaux = './noms_animaux.txt' |
|
|
|
|
|
|
|
with open(chemin_noms_animaux, 'r') as fichier: |
|
|
|
names = [line.strip().strip("'").strip(",").strip() for line in fichier.readlines()] |
|
|
|
def bayes_theorem(df, n_top_vectors=50): |
|
""" |
|
Calculate posterior probabilities using Bayes' theorem. |
|
|
|
This function limits the DataFrame to the top n vectors, calculates the sum of similarities |
|
for each category, and computes the posterior probabilities normalized by the total probability. |
|
|
|
Parameters: |
|
df (pd.DataFrame): DataFrame containing similarity percentages and categories. |
|
n_top_vectors (int): Number of top vectors to consider. |
|
|
|
Returns: |
|
dict: Normalized posterior probabilities for each category. |
|
""" |
|
|
|
df_limited = df.head(n_top_vectors) |
|
|
|
categories = df_limited['names_normalized'].unique() |
|
probas_a_posteriori = {categorie: 0 for categorie in categories} |
|
|
|
probas_a_priori = 1/3 |
|
|
|
for categorie in categories: |
|
somme_similarites = df_limited[df_limited['names_normalized'] == categorie]['percentage'].sum() |
|
probas_a_posteriori[categorie] = somme_similarites * probas_a_priori |
|
|
|
total_proba = sum(probas_a_posteriori.values()) |
|
probas_a_posteriori_normalisees = {categorie: (proba / total_proba) for categorie, proba in probas_a_posteriori.items()} |
|
return probas_a_posteriori_normalisees |
|
|
|
def get_name_from_index(index): |
|
""" |
|
Get the animal name corresponding to a given vector index. |
|
|
|
Parameters: |
|
index (int): Index of the vector. |
|
|
|
Returns: |
|
str: Name of the animal. |
|
""" |
|
return names[index] |
|
|
|
def name_normalisation(name): |
|
""" |
|
Normalize animal names. |
|
|
|
This function normalizes the names of animals by categorizing them into common types. |
|
|
|
Parameters: |
|
name (str): Name of the animal. |
|
|
|
Returns: |
|
str: Normalized animal name. |
|
""" |
|
if 'dog' in name: |
|
return "Chien" |
|
elif 'cat' in name: |
|
return "Chat" |
|
elif 'bird' in name: |
|
return "Oiseau" |
|
else: |
|
return "Animal non reconnu" |
|
|
|
def exp_negative(x): |
|
""" |
|
Define the negative exponential function. |
|
|
|
This function applies the negative exponential transformation to a given value. |
|
|
|
Parameters: |
|
x (float): Input value. |
|
|
|
Returns: |
|
float: Transformed value. |
|
""" |
|
return math.exp(-x) |
|
|
|
def normalization(embeddings): |
|
""" |
|
Normalize vectors. |
|
|
|
This function normalizes either a single vector (1D) or a matrix of vectors (2D). |
|
If the input is 1D, it normalizes the single vector; if 2D, it normalizes each row. |
|
|
|
Parameters: |
|
embeddings (np.ndarray): Input vector or matrix of vectors. |
|
|
|
Returns: |
|
np.ndarray: Normalized vector or matrix of vectors. |
|
""" |
|
|
|
if embeddings.ndim == 1: |
|
|
|
norm = np.linalg.norm(embeddings) |
|
if norm == 0: |
|
return embeddings |
|
return embeddings / norm |
|
else: |
|
|
|
norms = np.linalg.norm(embeddings, axis=1, keepdims=True) |
|
return embeddings / norms |
|
|
|
def get_audio_embedding(audio_path): |
|
""" |
|
Get the audio embedding for a given audio file. |
|
|
|
This function loads the audio file, processes it to obtain the emission, |
|
flattens and averages the features, normalizes them, and returns the normalized 2D array. |
|
|
|
Parameters: |
|
audio_path (str): Path to the audio file. |
|
|
|
Returns: |
|
np.ndarray: Normalized 2D array of audio embedding. |
|
""" |
|
waveform1, sample_rate1 = torchaudio.load(audio_path) |
|
waveform1 = torchaudio.functional.resample(waveform1, sample_rate1, bundle.sample_rate) |
|
with torch.inference_mode(): |
|
emission1, _ = model(waveform1) |
|
|
|
|
|
flattened_features1 = emission1.view(-1, emission1.size(2)) |
|
mean_features1 = flattened_features1.mean(dim=0) |
|
mean1_array = mean_features1.cpu().numpy().astype(np.float32) |
|
mean1_normal = normalization(mean1_array) |
|
mean1_normal_2d = mean1_normal[np.newaxis, :] |
|
return mean1_normal_2d |
|
|
|
def searchinIndex(index, normal_embedding): |
|
""" |
|
Search for the closest audio vectors in the animals.index file. |
|
|
|
This function searches the FAISS index for the most similar vectors to the given input embedding. |
|
|
|
Parameters: |
|
index (faiss.Index): The FAISS index to search. |
|
normal_embedding (np.ndarray): The normalized embedding to search for. |
|
|
|
Returns: |
|
pd.DataFrame: DataFrame containing distances and indices of the closest vectors. |
|
""" |
|
D, I = index.search(normal_embedding, index.ntotal) |
|
r = pd.DataFrame({'distance': D[0], 'index': I[0]}) |
|
return r |
|
|
|
def animal_classification(audio_path): |
|
""" |
|
Classify the species of animals from an audio file. |
|
|
|
This function extracts the audio embedding, searches the index, calculates similarity percentages, |
|
normalizes the names, and applies Bayes' theorem to determine the most likely animal. |
|
|
|
Parameters: |
|
audio_path (str): Path to the audio file. |
|
|
|
Returns: |
|
str: Formatted result with animal classifications and their probabilities. |
|
""" |
|
query_audio = get_audio_embedding(audio_path) |
|
results = searchinIndex(index, query_audio) |
|
results['percentage'] = results['distance'].apply(exp_negative) * 100 |
|
results['names'] = results['index'].apply(get_name_from_index) |
|
results['names_normalized'] = results['names'].apply(name_normalisation) |
|
resultat = bayes_theorem(results, 25) |
|
formatted_result = '\n'.join([f"{animal}: {percentage:.2%}" for animal, percentage in resultat.items()]) |
|
return formatted_result |
|
|
|
def add_in_index(audio_path): |
|
""" |
|
Add a new audio to the index for better classification. |
|
|
|
This function extracts the audio embedding from a new audio file, adds it to the FAISS index, |
|
updates the index file, and appends the name to the names list. |
|
|
|
Parameters: |
|
audio_path (str): Path to the audio file to be added. |
|
|
|
Returns: |
|
str: Confirmation message indicating the addition was successful. |
|
""" |
|
new_audio = get_audio_embedding(audio_path) |
|
index.add(new_audio) |
|
faiss.write_index(index, index_path) |
|
file_name = os.path.basename(audio_path) |
|
names.append(file_name) |
|
result = "L'ajout a bien effectué" |
|
with open(chemin_noms_animaux, 'w') as fichier: |
|
|
|
for nom in names: |
|
fichier.write(f"'{nom}',\n") |
|
return result |
|
|
|
|
|
interface = gr.Interface(fn=animal_classification, inputs="file", outputs="text") |
|
|
|
|
|
interface.launch() |
|
|