Spaces:
Runtime error
Runtime error
import torch | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer | |
import soundfile as sf | |
import librosa | |
import numpy as np | |
from flask import Flask, request, jsonify | |
import gradio as gr | |
app = Flask(__name__) | |
# Load pre-trained model and tokenizer from Hugging Face | |
model_name = "facebook/wav2vec2-large-960h" | |
tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name) | |
model = Wav2Vec2ForCTC.from_pretrained(model_name) | |
def load_audio(file_path): | |
audio, _ = librosa.load(file_path, sr=16000) | |
return audio | |
def clone_voice(audio): | |
input_values = tokenizer(audio, return_tensors="pt").input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = tokenizer.decode(predicted_ids[0]) | |
# Placeholder for voice conversion logic | |
converted_audio = np.array(audio) # Replace with actual conversion logic | |
output_path = "song_output/output.wav" | |
sf.write(output_path, converted_audio, 16000) | |
return output_path | |
def clone_voice_endpoint(): | |
if 'file' not in request.files: | |
return jsonify({"error": "No file provided"}), 400 | |
file = request.files['file'] | |
file_path = "input.wav" | |
file.save(file_path) | |
audio = load_audio(file_path) | |
output_path = clone_voice(audio) | |
return jsonify({"output_path": output_path}), 200 | |
def main_interface(audio): | |
output_path = clone_voice(audio) | |
return output_path | |
iface = gr.Interface(fn=main_interface, | |
inputs=gr.Audio(source="upload", type="numpy"), | |
outputs=gr.Audio(type="file")) | |
if __name__ == "__main__": | |
iface.launch(server_name="0.0.0.0", server_port=5000) |