Seqath commited on
Commit
bc8665f
1 Parent(s): 18d4cd1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
3
+ import soundfile as sf
4
+ import librosa
5
+ import numpy as np
6
+ from flask import Flask, request, jsonify
7
+ import gradio as gr
8
+
9
+ app = Flask(__name__)
10
+
11
+ # Load pre-trained model and tokenizer from Hugging Face
12
+ model_name = "facebook/wav2vec2-large-960h"
13
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
14
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
15
+
16
+ def load_audio(file_path):
17
+ audio, _ = librosa.load(file_path, sr=16000)
18
+ return audio
19
+
20
+ def clone_voice(audio):
21
+ input_values = tokenizer(audio, return_tensors="pt").input_values
22
+ logits = model(input_values).logits
23
+ predicted_ids = torch.argmax(logits, dim=-1)
24
+ transcription = tokenizer.decode(predicted_ids[0])
25
+
26
+ # Placeholder for voice conversion logic
27
+ converted_audio = np.array(audio) # Replace with actual conversion logic
28
+
29
+ output_path = "song_output/output.wav"
30
+ sf.write(output_path, converted_audio, 16000)
31
+ return output_path
32
+
33
+ @app.route('/clone-voice', methods=['POST'])
34
+ def clone_voice_endpoint():
35
+ if 'file' not in request.files:
36
+ return jsonify({"error": "No file provided"}), 400
37
+
38
+ file = request.files['file']
39
+ file_path = "input.wav"
40
+ file.save(file_path)
41
+
42
+ audio = load_audio(file_path)
43
+ output_path = clone_voice(audio)
44
+
45
+ return jsonify({"output_path": output_path}), 200
46
+
47
+ def main_interface(audio):
48
+ output_path = clone_voice(audio)
49
+ return output_path
50
+
51
+ iface = gr.Interface(fn=main_interface,
52
+ inputs=gr.Audio(source="upload", type="numpy"),
53
+ outputs=gr.Audio(type="file"))
54
+
55
+ if __name__ == "__main__":
56
+ iface.launch(server_name="0.0.0.0", server_port=5000)