jhtonyKoo commited on
Commit
6f16821
·
1 Parent(s): 35a672a

modify fx norm

Browse files
Files changed (1) hide show
  1. inference.py +7 -3
inference.py CHANGED
@@ -112,7 +112,7 @@ class MasteringStyleTransfer:
112
 
113
  return all_results, min_loss_step
114
 
115
- def preprocess_audio(self, audio, target_sample_rate=44100, is_input=False):
116
  sample_rate, data = audio
117
 
118
  # Normalize audio to -1 to 1 range
@@ -141,7 +141,7 @@ class MasteringStyleTransfer:
141
  data = julius.resample_frac(torch.from_numpy(data), sample_rate, target_sample_rate).numpy()
142
 
143
  # Apply fx normalization for input audio during mastering style transfer
144
- if is_input:
145
  data = self.fx_normalizer.normalize_audio(data, 'mixture')
146
 
147
  # Convert to torch tensor
@@ -150,8 +150,12 @@ class MasteringStyleTransfer:
150
  return data_tensor.to(self.device)
151
 
152
  def process_audio(self, input_audio, reference_audio):
153
- input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate, is_input=True)
 
 
154
  reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
 
 
155
 
156
  reference_feature = self.get_reference_embedding(reference_tensor)
157
 
 
112
 
113
  return all_results, min_loss_step
114
 
115
+ def preprocess_audio(self, audio, target_sample_rate=44100, normalize=False):
116
  sample_rate, data = audio
117
 
118
  # Normalize audio to -1 to 1 range
 
141
  data = julius.resample_frac(torch.from_numpy(data), sample_rate, target_sample_rate).numpy()
142
 
143
  # Apply fx normalization for input audio during mastering style transfer
144
+ if normalize:
145
  data = self.fx_normalizer.normalize_audio(data, 'mixture')
146
 
147
  # Convert to torch tensor
 
150
  return data_tensor.to(self.device)
151
 
152
  def process_audio(self, input_audio, reference_audio):
153
+ print(f"input: {input_audio}")
154
+ print(f"reference: {reference_audio}")
155
+ input_tensor = self.preprocess_audio(input_audio, self.args.sample_rate, normalize=True)
156
  reference_tensor = self.preprocess_audio(reference_audio, self.args.sample_rate)
157
+ print(f"input_tensor: {input_tensor.shape}")
158
+ print(f"reference_tensor: {reference_tensor.shape}")
159
 
160
  reference_feature = self.get_reference_embedding(reference_tensor)
161