Spaces:
Running
Running
modify fx norm
Browse files- app.py +1 -0
- inference.py +1 -5
app.py
CHANGED
@@ -87,6 +87,7 @@ def process_audio(input_audio, reference_audio):
|
|
87 |
output_audio = loudness_normalize(output_audio, sr)
|
88 |
# Denormalize the audio to int16
|
89 |
output_audio = denormalize_audio(output_audio, dtype=np.int16)
|
|
|
90 |
|
91 |
return (sr, output_audio), param_output, (sr, normalized_input)
|
92 |
|
|
|
87 |
output_audio = loudness_normalize(output_audio, sr)
|
88 |
# Denormalize the audio to int16
|
89 |
output_audio = denormalize_audio(output_audio, dtype=np.int16)
|
90 |
+
normalized_input = denormalize_audio(normalized_input, dtype=np.int16)
|
91 |
|
92 |
return (sr, output_audio), param_output, (sr, normalized_input)
|
93 |
|
inference.py
CHANGED
@@ -143,9 +143,7 @@ class MasteringStyleTransfer:
|
|
143 |
|
144 |
# Apply fx normalization for input audio during mastering style transfer
|
145 |
if normalize:
|
146 |
-
print(f"before normalization: {data.shape}")
|
147 |
data = self.fx_normalizer.normalize_audio(data.T, 'mixture').T
|
148 |
-
print(f"after normalization: {data.shape}")
|
149 |
|
150 |
# Convert to torch tensor
|
151 |
data_tensor = torch.FloatTensor(data).unsqueeze(0)
|
@@ -153,11 +151,9 @@ class MasteringStyleTransfer:
|
|
153 |
return data_tensor.to(self.device)
|
154 |
|
155 |
def process_audio(self, input_audio, reference_audio):
|
156 |
-
print(f"input: {input_audio}")
|
157 |
-
print(f"reference: {reference_audio}")
|
158 |
input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate, normalize=True)
|
159 |
-
print(f"input_tensor: {input_tensor.shape}")
|
160 |
reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
|
|
|
161 |
print(f"reference_tensor: {reference_tensor.shape}")
|
162 |
|
163 |
reference_feature = self.get_reference_embedding(reference_tensor)
|
|
|
143 |
|
144 |
# Apply fx normalization for input audio during mastering style transfer
|
145 |
if normalize:
|
|
|
146 |
data = self.fx_normalizer.normalize_audio(data.T, 'mixture').T
|
|
|
147 |
|
148 |
# Convert to torch tensor
|
149 |
data_tensor = torch.FloatTensor(data).unsqueeze(0)
|
|
|
151 |
return data_tensor.to(self.device)
|
152 |
|
153 |
def process_audio(self, input_audio, reference_audio):
|
|
|
|
|
154 |
input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate, normalize=True)
|
|
|
155 |
reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
|
156 |
+
print(f"input_tensor: {input_tensor.shape}")
|
157 |
print(f"reference_tensor: {reference_tensor.shape}")
|
158 |
|
159 |
reference_feature = self.get_reference_embedding(reference_tensor)
|