14-26AA commited on
Commit
eaabb20
1 Parent(s): 5ee7d48

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +13 -8
inference.py CHANGED
@@ -27,9 +27,9 @@ def _calculate_f0(input: np.ndarray,length,sr,f0min,f0max,
27
  return f0.reshape(-1)
28
 
29
 
30
- def get_text(file,transform=1.0):
31
 
32
- wav, sr = librosa.load(file,sr=None)
33
  if sr<16000:
34
  return 'sample rate too low'
35
  if len(wav.shape) > 1:
@@ -54,15 +54,21 @@ def get_text(file,transform=1.0):
54
  def getkey(key):
55
  return np.power(2,key/12.0)
56
 
57
- def infer(f,o,speaker,key,reqf0=False):
58
- x,sourcef0 = get_text(f,getkey(key))
 
 
 
 
 
 
 
59
  x_lengths = [np.size(x,1)]
60
  sid = [speaker]
61
  ort_inputs = {'x':x,'x_lengths':x_lengths,'sid':sid,"noise_scale":[0.667],"length_scale":[1.0],"noise_scale_w":[0.8]}
62
  infersession = onnxruntime.InferenceSession("onnxmodel334.onnx")#,providers=['CUDAExecutionProvider'])
63
  ort_output = infersession.run(['audio'], ort_inputs)
64
- sf.write(o,ort_output[0][0][0],22050,'PCM_16',format='wav')
65
- o.seek(0,0)
66
  genf0=np.array([])
67
  if reqf0:
68
  wav, sr = librosa.load(o,sr=None)
@@ -70,5 +76,4 @@ def infer(f,o,speaker,key,reqf0=False):
70
  f0min=librosa.note_to_hz('C2'),
71
  f0max=librosa.note_to_hz('C7'))
72
  genf0=resize2d(genf0,x_lengths[0])
73
- o.seek(0,0)
74
- return sourcef0.tolist(),genf0.tolist()
 
27
  return f0.reshape(-1)
28
 
29
 
30
+ def get_text(wav,sr,transform=1.0):
31
 
32
+ #wav, sr = librosa.load(file,sr=None)
33
  if sr<16000:
34
  return 'sample rate too low'
35
  if len(wav.shape) > 1:
 
54
  def getkey(key):
55
  return np.power(2,key/12.0)
56
 
57
+ def infer(f,speaker,key,reqf0=False):
58
+ if f is None:
59
+ return "You need to upload an audio", None
60
+ sr, audio = f
61
+ duration = audio.shape[0] / sr
62
+ if duration > 120:
63
+ return "请上传小于2min的音频", None
64
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
65
+ x,sourcef0 = get_text(audio,sr,getkey(key))
66
  x_lengths = [np.size(x,1)]
67
  sid = [speaker]
68
  ort_inputs = {'x':x,'x_lengths':x_lengths,'sid':sid,"noise_scale":[0.667],"length_scale":[1.0],"noise_scale_w":[0.8]}
69
  infersession = onnxruntime.InferenceSession("onnxmodel334.onnx")#,providers=['CUDAExecutionProvider'])
70
  ort_output = infersession.run(['audio'], ort_inputs)
71
+ #sf.write(o,ort_output[0][0][0],22050,'PCM_16',format='wav')
 
72
  genf0=np.array([])
73
  if reqf0:
74
  wav, sr = librosa.load(o,sr=None)
 
76
  f0min=librosa.note_to_hz('C2'),
77
  f0max=librosa.note_to_hz('C7'))
78
  genf0=resize2d(genf0,x_lengths[0])
79
+ return 'success',ort_output[0][0][0]#sourcef0.tolist(),genf0.tolist()