jhtonyKoo commited on
Commit
e182234
1 Parent(s): 71c3a7e

modify app

Browse files
Files changed (2) hide show
  1. app.py +56 -36
  2. inference.py +2 -2
app.py CHANGED
@@ -16,7 +16,17 @@ def process_audio(input_audio, reference_audio):
16
 
17
  param_output = mastering_transfer.get_param_output_string(predicted_params)
18
 
19
- return "output_mastered.wav", param_output
 
 
 
 
 
 
 
 
 
 
20
 
21
  def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
22
  if ito_reference_audio is None:
@@ -36,13 +46,24 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
36
 
37
  initial_reference_feature = mastering_transfer.get_reference_embedding(reference_tensor)
38
 
39
- ito_output, ito_params, optimized_embedding, steps_taken, ito_log = mastering_transfer.inference_time_optimization(
 
40
  input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
41
- )
42
-
43
- ito_param_output = mastering_transfer.get_param_output_string(ito_params)
44
-
45
- return "ito_output_mastered.wav", ito_param_output, steps_taken, ito_log
 
 
 
 
 
 
 
 
 
 
46
 
47
  with gr.Blocks() as demo:
48
  gr.Markdown("# Mastering Style Transfer Demo")
@@ -64,38 +85,37 @@ with gr.Blocks() as demo:
64
  outputs=[output_audio, param_output]
65
  )
66
 
67
- gr.Markdown("## Inference Time Optimization (ITO)")
68
-
69
- with gr.Row():
70
- with gr.Column(scale=2):
71
- ito_reference_audio = gr.Audio(label="ITO Reference Audio (optional)")
72
- num_steps = gr.Slider(minimum=1, maximum=1000, value=100, step=1, label="Number of Steps")
73
- optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
74
- learning_rate = gr.Slider(minimum=0.0001, maximum=0.1, value=0.001, step=0.0001, label="Learning Rate")
75
- af_weights = gr.Textbox(label="AudioFeatureLoss Weights (comma-separated)", value="0.1,0.001,1.0,1.0,0.1")
76
-
77
- ito_button = gr.Button("Perform ITO")
78
-
79
- ito_output_audio = gr.Audio(label="ITO Output Audio")
80
- ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=10)
81
- ito_steps_taken = gr.Number(label="ITO Steps Taken")
82
 
83
- with gr.Column(scale=1):
84
- ito_log = gr.Textbox(label="ITO Log", lines=30)
85
-
86
- def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
87
- af_weights = [float(w.strip()) for w in af_weights.split(',')]
88
- ito_output, ito_params, steps_taken, log = perform_ito(
89
- input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights
90
- )
91
- return ito_output, ito_params, steps_taken, log
92
-
93
- ito_button.click(
94
- run_ito,
95
- inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
96
- outputs=[ito_output_audio, ito_param_output, ito_steps_taken, ito_log]
97
  )
98
 
 
 
 
 
 
 
99
  demo.launch()
100
 
101
 
 
16
 
17
  param_output = mastering_transfer.get_param_output_string(predicted_params)
18
 
19
+ # Convert output_audio to numpy array if it's a tensor
20
+ if isinstance(output_audio, torch.Tensor):
21
+ output_audio = output_audio.cpu().numpy()
22
+
23
+ # Ensure the audio is in the correct shape (samples, channels)
24
+ if output_audio.ndim == 1:
25
+ output_audio = output_audio.reshape(-1, 1)
26
+ elif output_audio.ndim > 2:
27
+ output_audio = output_audio.squeeze()
28
+
29
+ return (sr, output_audio), param_output
30
 
31
  def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
32
  if ito_reference_audio is None:
 
46
 
47
  initial_reference_feature = mastering_transfer.get_reference_embedding(reference_tensor)
48
 
49
+ ito_log = ""
50
+ for log_entry, current_output, current_params, step in mastering_transfer.inference_time_optimization(
51
  input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
52
+ ):
53
+ ito_log += log_entry
54
+ ito_param_output = mastering_transfer.get_param_output_string(current_params)
55
+
56
+ # Convert current_output to numpy array if it's a tensor
57
+ if isinstance(current_output, torch.Tensor):
58
+ current_output = current_output.cpu().numpy()
59
+
60
+ # Ensure the audio is in the correct shape (samples, channels)
61
+ if current_output.ndim == 1:
62
+ current_output = current_output.reshape(-1, 1)
63
+ elif current_output.ndim > 2:
64
+ current_output = current_output.squeeze()
65
+
66
+ yield (args.sample_rate, current_output), ito_param_output, step, ito_log
67
 
68
  with gr.Blocks() as demo:
69
  gr.Markdown("# Mastering Style Transfer Demo")
 
85
  outputs=[output_audio, param_output]
86
  )
87
 
88
+ gr.Markdown("## Inference Time Optimization (ITO)")
89
+
90
+ with gr.Row():
91
+ with gr.Column(scale=2):
92
+ ito_reference_audio = gr.Audio(label="ITO Reference Audio (optional)")
93
+ num_steps = gr.Slider(minimum=1, maximum=1000, value=100, step=1, label="Number of Steps")
94
+ optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
95
+ learning_rate = gr.Slider(minimum=0.0001, maximum=0.1, value=0.001, step=0.0001, label="Learning Rate")
96
+ af_weights = gr.Textbox(label="AudioFeatureLoss Weights (comma-separated)", value="0.1,0.001,1.0,1.0,0.1")
97
+
98
+ ito_button = gr.Button("Perform ITO")
 
 
 
 
99
 
100
+ ito_output_audio = gr.Audio(label="ITO Output Audio")
101
+ ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=10)
102
+ ito_steps_taken = gr.Number(label="ITO Steps Taken")
103
+
104
+ with gr.Column(scale=1):
105
+ ito_log = gr.Textbox(label="ITO Log", lines=30)
106
+
107
+ def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
108
+ af_weights = [float(w.strip()) for w in af_weights.split(',')]
109
+ return perform_ito(
110
+ input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights
 
 
 
111
  )
112
 
113
+ ito_button.click(
114
+ run_ito,
115
+ inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
116
+ outputs=[ito_output_audio, ito_param_output, ito_steps_taken, ito_log]
117
+ )
118
+
119
  demo.launch()
120
 
121
 
inference.py CHANGED
@@ -110,7 +110,7 @@ class MasteringStyleTransfer:
110
  initial_params = current_params
111
  top_10_diff = self.get_top_10_diff_string(initial_params, current_params)
112
  log_entry = f"Step {step + 1}, Loss: {total_loss.item():.4f}\n{top_10_diff}\n"
113
- ito_log.append(log_entry)
114
 
115
  if divergence_counter >= 10:
116
  print(f"Optimization stopped early due to divergence at step {step}")
@@ -119,7 +119,7 @@ class MasteringStyleTransfer:
119
  total_loss.backward()
120
  optimizer.step()
121
 
122
- return min_loss_output, min_loss_params, min_loss_embedding, min_loss_step + 1, "\n".join(ito_log)
123
 
124
  def preprocess_audio(self, audio, target_sample_rate=44100):
125
  sample_rate, data = audio
 
110
  initial_params = current_params
111
  top_10_diff = self.get_top_10_diff_string(initial_params, current_params)
112
  log_entry = f"Step {step + 1}, Loss: {total_loss.item():.4f}\n{top_10_diff}\n"
113
+ yield log_entry, output_audio, current_params, step + 1
114
 
115
  if divergence_counter >= 10:
116
  print(f"Optimization stopped early due to divergence at step {step}")
 
119
  total_loss.backward()
120
  optimizer.step()
121
 
122
+ return min_loss_output, min_loss_params, min_loss_embedding, min_loss_step + 1
123
 
124
  def preprocess_audio(self, audio, target_sample_rate=44100):
125
  sample_rate, data = audio