Spaces:
Build error
Build error
Update inference.py
Browse files- inference.py +13 -8
inference.py
CHANGED
@@ -27,9 +27,9 @@ def _calculate_f0(input: np.ndarray,length,sr,f0min,f0max,
|
|
27 |
return f0.reshape(-1)
|
28 |
|
29 |
|
30 |
-
def get_text(
|
31 |
|
32 |
-
wav, sr = librosa.load(file,sr=None)
|
33 |
if sr<16000:
|
34 |
return 'sample rate too low'
|
35 |
if len(wav.shape) > 1:
|
@@ -54,15 +54,21 @@ def get_text(file,transform=1.0):
|
|
54 |
def getkey(key):
|
55 |
return np.power(2,key/12.0)
|
56 |
|
57 |
-
def infer(f,
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
x_lengths = [np.size(x,1)]
|
60 |
sid = [speaker]
|
61 |
ort_inputs = {'x':x,'x_lengths':x_lengths,'sid':sid,"noise_scale":[0.667],"length_scale":[1.0],"noise_scale_w":[0.8]}
|
62 |
infersession = onnxruntime.InferenceSession("onnxmodel334.onnx")#,providers=['CUDAExecutionProvider'])
|
63 |
ort_output = infersession.run(['audio'], ort_inputs)
|
64 |
-
sf.write(o,ort_output[0][0][0],22050,'PCM_16',format='wav')
|
65 |
-
o.seek(0,0)
|
66 |
genf0=np.array([])
|
67 |
if reqf0:
|
68 |
wav, sr = librosa.load(o,sr=None)
|
@@ -70,5 +76,4 @@ def infer(f,o,speaker,key,reqf0=False):
|
|
70 |
f0min=librosa.note_to_hz('C2'),
|
71 |
f0max=librosa.note_to_hz('C7'))
|
72 |
genf0=resize2d(genf0,x_lengths[0])
|
73 |
-
|
74 |
-
return sourcef0.tolist(),genf0.tolist()
|
|
|
27 |
return f0.reshape(-1)
|
28 |
|
29 |
|
30 |
+
def get_text(wav,sr,transform=1.0):
|
31 |
|
32 |
+
#wav, sr = librosa.load(file,sr=None)
|
33 |
if sr<16000:
|
34 |
return 'sample rate too low'
|
35 |
if len(wav.shape) > 1:
|
|
|
54 |
def getkey(key):
|
55 |
return np.power(2,key/12.0)
|
56 |
|
57 |
+
def infer(f,speaker,key,reqf0=False):
|
58 |
+
if f is None:
|
59 |
+
return "You need to upload an audio", None
|
60 |
+
sr, audio = f
|
61 |
+
duration = audio.shape[0] / sr
|
62 |
+
if duration > 120:
|
63 |
+
return "请上传小于2min的音频", None
|
64 |
+
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
65 |
+
x,sourcef0 = get_text(audio,sr,getkey(key))
|
66 |
x_lengths = [np.size(x,1)]
|
67 |
sid = [speaker]
|
68 |
ort_inputs = {'x':x,'x_lengths':x_lengths,'sid':sid,"noise_scale":[0.667],"length_scale":[1.0],"noise_scale_w":[0.8]}
|
69 |
infersession = onnxruntime.InferenceSession("onnxmodel334.onnx")#,providers=['CUDAExecutionProvider'])
|
70 |
ort_output = infersession.run(['audio'], ort_inputs)
|
71 |
+
#sf.write(o,ort_output[0][0][0],22050,'PCM_16',format='wav')
|
|
|
72 |
genf0=np.array([])
|
73 |
if reqf0:
|
74 |
wav, sr = librosa.load(o,sr=None)
|
|
|
76 |
f0min=librosa.note_to_hz('C2'),
|
77 |
f0max=librosa.note_to_hz('C7'))
|
78 |
genf0=resize2d(genf0,x_lengths[0])
|
79 |
+
return 'success',ort_output[0][0][0]#sourcef0.tolist(),genf0.tolist()
|
|