ZeyuXie commited on
Commit
101c1cd
1 Parent(s): 9fd4c53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -33,7 +33,7 @@ class InferRunner:
33
  self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
34
 
35
  device = "cuda" if torch.cuda.is_available() else "cpu"
36
- # runner = InferRunner(device)
37
  event_list = get_event()
38
  def infer(caption, num_steps=200, guidance_scale=3.0, audio_len=16000*10):
39
  with torch.no_grad():
@@ -49,7 +49,6 @@ def preprocess(caption):
49
  return output, output
50
 
51
  def update_textbox(event_name, current_text):
52
- print(event_name, current_text)
53
  event = event_name + ' two times.'
54
  if current_text:
55
  return current_text.strip('.') + ' then ' + event
@@ -60,7 +59,7 @@ with gr.Blocks() as demo:
60
  with gr.Row():
61
  gr.Markdown("## PicoAudio")
62
  with gr.Row():
63
- description_text = f"18 events supported :"
64
  gr.Markdown(description_text)
65
 
66
 
@@ -80,20 +79,22 @@ with gr.Blocks() as demo:
80
 
81
 
82
  with gr.Row():
83
- gr.Markdown("## Step1")
84
  with gr.Row():
85
- preprocess_description_text = f"Preprocess: transfer free-text into timestamp caption via LLM. "+\
86
  "This demo uses Gemini as the preprocessor. If any errors occur, please try a few more times. "+\
87
  "We also provide the GPT version consistent with the paper in the file 'Files/llm_reprocessing.py'. You can use your own api_key to modify and run 'Files/inference.py' for local inference."
88
  gr.Markdown(preprocess_description_text)
89
  with gr.Row():
90
  with gr.Column():
91
- freetext_prompt = gr.Textbox(label="Prompt: Input your free-text caption here. (e.g. a dog barks three times.)",
92
  value="a dog barks three times.",)
93
- preprocess_run_button = gr.Button()
 
 
94
  prompt = None
95
  with gr.Column():
96
- freetext_prompt_out = gr.Textbox(label="Preprocess output")
97
  with gr.Row():
98
  with gr.Column():
99
  gr.Examples(
@@ -108,15 +109,17 @@ with gr.Blocks() as demo:
108
 
109
 
110
  with gr.Row():
111
- gr.Markdown("## Step2")
112
  with gr.Row():
113
  generate_description_text = f"Generate audio based on timestamp caption."
114
  gr.Markdown(generate_description_text)
115
  with gr.Row():
116
  with gr.Column():
117
- prompt = gr.Textbox(label="Prompt: Input your caption formatted as 'event1 at onset1-offset1_onset2-offset2 and event2 at onset1-offset1'.",
118
  value="spraying at 0.38-1.176_3.06-3.856 and gunshot at 1.729-3.729_4.367-6.367_7.031-9.031.",)
119
- generate_run_button = gr.Button()
 
 
120
  with gr.Accordion("Advanced options", open=False):
121
  num_steps = gr.Slider(label="num_steps", minimum=1, maximum=300, value=200, step=1)
122
  guidance_scale = gr.Slider(label="guidance_scale", minimum=0.1, maximum=8.0, value=3.0, step=0.1)
 
33
  self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
34
 
35
  device = "cuda" if torch.cuda.is_available() else "cpu"
36
+ runner = InferRunner(device)
37
  event_list = get_event()
38
  def infer(caption, num_steps=200, guidance_scale=3.0, audio_len=16000*10):
39
  with torch.no_grad():
 
49
  return output, output
50
 
51
  def update_textbox(event_name, current_text):
 
52
  event = event_name + ' two times.'
53
  if current_text:
54
  return current_text.strip('.') + ' then ' + event
 
59
  with gr.Row():
60
  gr.Markdown("## PicoAudio")
61
  with gr.Row():
62
+ description_text = f"18 events supported:"
63
  gr.Markdown(description_text)
64
 
65
 
 
79
 
80
 
81
  with gr.Row():
82
+ gr.Markdown("## Step1-Preprocess")
83
  with gr.Row():
84
+ preprocess_description_text = f"Transfer free-text into timestamp caption via LLM. "+\
85
  "This demo uses Gemini as the preprocessor. If any errors occur, please try a few more times. "+\
86
  "We also provide the GPT version consistent with the paper in the file 'Files/llm_reprocessing.py'. You can use your own api_key to modify and run 'Files/inference.py' for local inference."
87
  gr.Markdown(preprocess_description_text)
88
  with gr.Row():
89
  with gr.Column():
90
+ freetext_prompt = gr.Textbox(label="Free-text Prompt: Input your free-text caption here. (e.g. a dog barks three times.)",
91
  value="a dog barks three times.",)
92
+ with gr.Row():
93
+ preprocess_run_button = gr.Button()
94
+ preprocess_run_clear = gr.ClearButton([freetext_prompt])
95
  prompt = None
96
  with gr.Column():
97
+ freetext_prompt_out = gr.Textbox(label="Timestamp Caption: Preprocess output")
98
  with gr.Row():
99
  with gr.Column():
100
  gr.Examples(
 
109
 
110
 
111
  with gr.Row():
112
+ gr.Markdown("## Step2-Generate")
113
  with gr.Row():
114
  generate_description_text = f"Generate audio based on timestamp caption."
115
  gr.Markdown(generate_description_text)
116
  with gr.Row():
117
  with gr.Column():
118
+ prompt = gr.Textbox(label="Timestamp Caption: Specify your timestamp caption formatted as 'event1 at onset1-offset1_onset2-offset2 and event2 at onset1-offset1'.",
119
  value="spraying at 0.38-1.176_3.06-3.856 and gunshot at 1.729-3.729_4.367-6.367_7.031-9.031.",)
120
+ with gr.Row():
121
+ generate_run_button = gr.Button()
122
+ generate_run_clear = gr.ClearButton([prompt])
123
  with gr.Accordion("Advanced options", open=False):
124
  num_steps = gr.Slider(label="num_steps", minimum=1, maximum=300, value=200, step=1)
125
  guidance_scale = gr.Slider(label="guidance_scale", minimum=0.1, maximum=8.0, value=3.0, step=0.1)