jhtonyKoo commited on
Commit
bb9523a
1 Parent(s): 20161bc

modify fx norm

Browse files
Files changed (2) hide show
  1. app.py +22 -39
  2. inference.py +0 -2
app.py CHANGED
@@ -63,31 +63,34 @@ def process_audio_with_youtube(input_audio, input_youtube_url, reference_audio,
63
 
64
  return process_audio(input_audio, reference_audio)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def process_audio(input_audio, reference_audio):
67
  output_audio, predicted_params, sr, normalized_input = mastering_transfer.process_audio(
68
  input_audio, reference_audio
69
  )
70
 
71
  param_output = mastering_transfer.get_param_output_string(predicted_params)
72
-
73
- # Convert output_audio to numpy array if it's a tensor
74
- if isinstance(output_audio, torch.Tensor):
75
- output_audio = output_audio.cpu().numpy()
76
-
77
- if output_audio.ndim == 1:
78
- output_audio = output_audio.reshape(-1, 1)
79
- elif output_audio.ndim > 2:
80
- output_audio = output_audio.squeeze()
81
-
82
- # Ensure the audio is in the correct shape (samples, channels)
83
- if output_audio.shape[1] > output_audio.shape[0]:
84
- output_audio = output_audio.transpose(1,0)
85
 
 
 
 
86
  # Normalize output audio
87
- output_audio = loudness_normalize(output_audio, sr)
88
  # Denormalize the audio to int16
89
  output_audio = denormalize_audio(output_audio, dtype=np.int16)
90
- normalized_input = denormalize_audio(normalized_input, dtype=np.int16)
91
 
92
  return (sr, output_audio), param_output, (sr, normalized_input)
93
 
@@ -125,18 +128,8 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
125
  current_output = last_result['audio']
126
  ito_param_output = mastering_transfer.get_param_output_string(last_result['params'])
127
 
128
- # Convert current_output to numpy array if it's a tensor
129
- if isinstance(current_output, torch.Tensor):
130
- current_output = current_output.cpu().numpy()
131
-
132
- if current_output.ndim == 1:
133
- current_output = current_output.reshape(-1, 1)
134
- elif current_output.ndim > 2:
135
- current_output = current_output.squeeze()
136
- # Ensure the audio is in the correct shape (samples, channels)
137
- if current_output.shape[1] > current_output.shape[0]:
138
- current_output = current_output.transpose(1,0)
139
-
140
  # Loudness normalize output audio
141
  current_output = loudness_normalize(current_output, args.sample_rate)
142
  # Denormalize the audio to int16
@@ -149,18 +142,8 @@ def update_ito_output(all_results, selected_step):
149
  current_output = selected_result['audio']
150
  ito_param_output = mastering_transfer.get_param_output_string(selected_result['params'])
151
 
152
- # Convert current_output to numpy array if it's a tensor
153
- if isinstance(current_output, torch.Tensor):
154
- current_output = current_output.cpu().numpy()
155
-
156
- if current_output.ndim == 1:
157
- current_output = current_output.reshape(-1, 1)
158
- elif current_output.ndim > 2:
159
- current_output = current_output.squeeze()
160
- # Ensure the audio is in the correct shape (samples, channels)
161
- if current_output.shape[1] > current_output.shape[0]:
162
- current_output = current_output.transpose(1,0)
163
-
164
  # Loudness normalize output audio
165
  current_output = loudness_normalize(current_output, args.sample_rate)
166
  # Denormalize the audio to int16
 
63
 
64
  return process_audio(input_audio, reference_audio)
65
 
66
+ def to_numpy_audio(audio):
67
+ # Convert output_audio to numpy array if it's a tensor
68
+ if isinstance(audio, torch.Tensor):
69
+ audio = audio.cpu().numpy()
70
+ # check dimension
71
+ if audio.ndim == 1:
72
+ audio = audio.reshape(-1, 1)
73
+ elif audio.ndim > 2:
74
+ audio = audio.squeeze()
75
+ # Ensure the audio is in the correct shape (samples, channels)
76
+ if audio.shape[1] > audio.shape[0]:
77
+ audio = audio.transpose(1,0)
78
+ return audio
79
+
80
  def process_audio(input_audio, reference_audio):
81
  output_audio, predicted_params, sr, normalized_input = mastering_transfer.process_audio(
82
  input_audio, reference_audio
83
  )
84
 
85
  param_output = mastering_transfer.get_param_output_string(predicted_params)
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ # Convert to numpy audio
88
+ output_audio = to_numpy_audio(output_audio)
89
+ normalized_input = to_numpy_audio(normalized_input)
90
  # Normalize output audio
91
+ output_audio = loudness_normalize(output_audio, sr)
92
  # Denormalize the audio to int16
93
  output_audio = denormalize_audio(output_audio, dtype=np.int16)
 
94
 
95
  return (sr, output_audio), param_output, (sr, normalized_input)
96
 
 
128
  current_output = last_result['audio']
129
  ito_param_output = mastering_transfer.get_param_output_string(last_result['params'])
130
 
131
+ # Convert to numpy audio
132
+ current_output = to_numpy_audio(current_output)
 
 
 
 
 
 
 
 
 
 
133
  # Loudness normalize output audio
134
  current_output = loudness_normalize(current_output, args.sample_rate)
135
  # Denormalize the audio to int16
 
142
  current_output = selected_result['audio']
143
  ito_param_output = mastering_transfer.get_param_output_string(selected_result['params'])
144
 
145
+ # Convert to numpy audio
146
+ current_output = to_numpy_audio(current_output)
 
 
 
 
 
 
 
 
 
 
147
  # Loudness normalize output audio
148
  current_output = loudness_normalize(current_output, args.sample_rate)
149
  # Denormalize the audio to int16
inference.py CHANGED
@@ -153,8 +153,6 @@ class MasteringStyleTransfer:
153
  def process_audio(self, input_audio, reference_audio):
154
  input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate, normalize=True)
155
  reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
156
- print(f"input_tensor: {input_tensor.shape}")
157
- print(f"reference_tensor: {reference_tensor.shape}")
158
 
159
  reference_feature = self.get_reference_embedding(reference_tensor)
160
 
 
153
  def process_audio(self, input_audio, reference_audio):
154
  input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate, normalize=True)
155
  reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
 
 
156
 
157
  reference_feature = self.get_reference_embedding(reference_tensor)
158