rmayormartins
commited on
Commit
•
5ffd823
1
Parent(s):
5e37164
Subindo arquivos371
Browse files
app.py
CHANGED
@@ -1,32 +1,26 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import numpy as np
|
|
|
4 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
|
5 |
from safetensors.torch import load_file
|
6 |
|
7 |
-
#
|
8 |
-
model_name =
|
9 |
processor = Wav2Vec2Processor.from_pretrained(model_name)
|
10 |
|
11 |
# Carregar o modelo do arquivo safetensors
|
12 |
-
state_dict = load_file("
|
13 |
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name, state_dict=state_dict)
|
14 |
|
15 |
def classify_accent(audio):
|
16 |
if audio is None:
|
17 |
return "Error: No se recibió audio"
|
18 |
|
19 |
-
# Verificar o tipo de entrada de áudio
|
20 |
-
print(f"Tipo de entrada de audio: {type(audio)}")
|
21 |
-
|
22 |
-
# Verificar a estrutura do áudio
|
23 |
-
print(f"Entrada de audio recibida: {audio}")
|
24 |
-
|
25 |
try:
|
26 |
-
#
|
27 |
-
if isinstance(audio,
|
28 |
-
audio_array =
|
29 |
-
sample_rate = audio[1] # A taxa de amostragem no segundo elemento da tupla
|
30 |
else:
|
31 |
raise ValueError("Formato de áudio inesperado.")
|
32 |
|
@@ -37,7 +31,6 @@ def classify_accent(audio):
|
|
37 |
|
38 |
# Resample para 16kHz, se necessário
|
39 |
if sample_rate != 16000:
|
40 |
-
import librosa
|
41 |
audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16000)
|
42 |
|
43 |
input_values = processor(audio_array, return_tensors="pt", sampling_rate=16000).input_values
|
@@ -63,10 +56,10 @@ description_html = """
|
|
63 |
# Interface do Gradio
|
64 |
interface = gr.Interface(
|
65 |
fn=classify_accent,
|
66 |
-
inputs=gr.Audio(type="
|
67 |
outputs="label",
|
68 |
title="Clasificador de Sotaques (Español vs Otro)",
|
69 |
description=description_html
|
70 |
)
|
71 |
|
72 |
-
interface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
+
import librosa
|
5 |
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
|
6 |
from safetensors.torch import load_file
|
7 |
|
8 |
+
# Caminho para o modelo e processador
|
9 |
+
model_name = 'results'
|
10 |
processor = Wav2Vec2Processor.from_pretrained(model_name)
|
11 |
|
12 |
# Carregar o modelo do arquivo safetensors
|
13 |
+
state_dict = load_file(f"{model_name}/model.safetensors")
|
14 |
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name, state_dict=state_dict)
|
15 |
|
16 |
def classify_accent(audio):
|
17 |
if audio is None:
|
18 |
return "Error: No se recibió audio"
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
try:
|
21 |
+
# Verificar se o áudio é um caminho de arquivo
|
22 |
+
if isinstance(audio, str):
|
23 |
+
audio_array, sample_rate = librosa.load(audio, sr=None)
|
|
|
24 |
else:
|
25 |
raise ValueError("Formato de áudio inesperado.")
|
26 |
|
|
|
31 |
|
32 |
# Resample para 16kHz, se necessário
|
33 |
if sample_rate != 16000:
|
|
|
34 |
audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=16000)
|
35 |
|
36 |
input_values = processor(audio_array, return_tensors="pt", sampling_rate=16000).input_values
|
|
|
56 |
# Interface do Gradio
|
57 |
interface = gr.Interface(
|
58 |
fn=classify_accent,
|
59 |
+
inputs=gr.Audio(type="filepath"),
|
60 |
outputs="label",
|
61 |
title="Clasificador de Sotaques (Español vs Otro)",
|
62 |
description=description_html
|
63 |
)
|
64 |
|
65 |
+
interface.launch(debug=True)
|