Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -33,7 +33,7 @@ class InferRunner:
|
|
33 |
self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
|
34 |
|
35 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
36 |
-
|
37 |
event_list = get_event()
|
38 |
def infer(caption, num_steps=200, guidance_scale=3.0, audio_len=16000*10):
|
39 |
with torch.no_grad():
|
@@ -49,7 +49,6 @@ def preprocess(caption):
|
|
49 |
return output, output
|
50 |
|
51 |
def update_textbox(event_name, current_text):
|
52 |
-
print(event_name, current_text)
|
53 |
event = event_name + ' two times.'
|
54 |
if current_text:
|
55 |
return current_text.strip('.') + ' then ' + event
|
@@ -60,7 +59,7 @@ with gr.Blocks() as demo:
|
|
60 |
with gr.Row():
|
61 |
gr.Markdown("## PicoAudio")
|
62 |
with gr.Row():
|
63 |
-
description_text = f"18 events supported
|
64 |
gr.Markdown(description_text)
|
65 |
|
66 |
|
@@ -80,20 +79,22 @@ with gr.Blocks() as demo:
|
|
80 |
|
81 |
|
82 |
with gr.Row():
|
83 |
-
gr.Markdown("## Step1")
|
84 |
with gr.Row():
|
85 |
-
preprocess_description_text = f"
|
86 |
"This demo uses Gemini as the preprocessor. If any errors occur, please try a few more times. "+\
|
87 |
"We also provide the GPT version consistent with the paper in the file 'Files/llm_reprocessing.py'. You can use your own api_key to modify and run 'Files/inference.py' for local inference."
|
88 |
gr.Markdown(preprocess_description_text)
|
89 |
with gr.Row():
|
90 |
with gr.Column():
|
91 |
-
freetext_prompt = gr.Textbox(label="Prompt: Input your free-text caption here. (e.g. a dog barks three times.)",
|
92 |
value="a dog barks three times.",)
|
93 |
-
|
|
|
|
|
94 |
prompt = None
|
95 |
with gr.Column():
|
96 |
-
freetext_prompt_out = gr.Textbox(label="Preprocess output")
|
97 |
with gr.Row():
|
98 |
with gr.Column():
|
99 |
gr.Examples(
|
@@ -108,15 +109,17 @@ with gr.Blocks() as demo:
|
|
108 |
|
109 |
|
110 |
with gr.Row():
|
111 |
-
gr.Markdown("## Step2")
|
112 |
with gr.Row():
|
113 |
generate_description_text = f"Generate audio based on timestamp caption."
|
114 |
gr.Markdown(generate_description_text)
|
115 |
with gr.Row():
|
116 |
with gr.Column():
|
117 |
-
prompt = gr.Textbox(label="
|
118 |
value="spraying at 0.38-1.176_3.06-3.856 and gunshot at 1.729-3.729_4.367-6.367_7.031-9.031.",)
|
119 |
-
|
|
|
|
|
120 |
with gr.Accordion("Advanced options", open=False):
|
121 |
num_steps = gr.Slider(label="num_steps", minimum=1, maximum=300, value=200, step=1)
|
122 |
guidance_scale = gr.Slider(label="guidance_scale", minimum=0.1, maximum=8.0, value=3.0, step=0.1)
|
|
|
33 |
self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
|
34 |
|
35 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
36 |
+
runner = InferRunner(device)
|
37 |
event_list = get_event()
|
38 |
def infer(caption, num_steps=200, guidance_scale=3.0, audio_len=16000*10):
|
39 |
with torch.no_grad():
|
|
|
49 |
return output, output
|
50 |
|
51 |
def update_textbox(event_name, current_text):
|
|
|
52 |
event = event_name + ' two times.'
|
53 |
if current_text:
|
54 |
return current_text.strip('.') + ' then ' + event
|
|
|
59 |
with gr.Row():
|
60 |
gr.Markdown("## PicoAudio")
|
61 |
with gr.Row():
|
62 |
+
description_text = f"18 events supported:"
|
63 |
gr.Markdown(description_text)
|
64 |
|
65 |
|
|
|
79 |
|
80 |
|
81 |
with gr.Row():
|
82 |
+
gr.Markdown("## Step1-Preprocess")
|
83 |
with gr.Row():
|
84 |
+
preprocess_description_text = f"Transfer free-text into timestamp caption via LLM. "+\
|
85 |
"This demo uses Gemini as the preprocessor. If any errors occur, please try a few more times. "+\
|
86 |
"We also provide the GPT version consistent with the paper in the file 'Files/llm_reprocessing.py'. You can use your own api_key to modify and run 'Files/inference.py' for local inference."
|
87 |
gr.Markdown(preprocess_description_text)
|
88 |
with gr.Row():
|
89 |
with gr.Column():
|
90 |
+
freetext_prompt = gr.Textbox(label="Free-text Prompt: Input your free-text caption here. (e.g. a dog barks three times.)",
|
91 |
value="a dog barks three times.",)
|
92 |
+
with gr.Row():
|
93 |
+
preprocess_run_button = gr.Button()
|
94 |
+
preprocess_run_clear = gr.ClearButton([freetext_prompt])
|
95 |
prompt = None
|
96 |
with gr.Column():
|
97 |
+
freetext_prompt_out = gr.Textbox(label="Timestamp Caption: Preprocess output")
|
98 |
with gr.Row():
|
99 |
with gr.Column():
|
100 |
gr.Examples(
|
|
|
109 |
|
110 |
|
111 |
with gr.Row():
|
112 |
+
gr.Markdown("## Step2-Generate")
|
113 |
with gr.Row():
|
114 |
generate_description_text = f"Generate audio based on timestamp caption."
|
115 |
gr.Markdown(generate_description_text)
|
116 |
with gr.Row():
|
117 |
with gr.Column():
|
118 |
+
prompt = gr.Textbox(label="Timestamp Caption: Specify your timestamp caption formatted as 'event1 at onset1-offset1_onset2-offset2 and event2 at onset1-offset1'.",
|
119 |
value="spraying at 0.38-1.176_3.06-3.856 and gunshot at 1.729-3.729_4.367-6.367_7.031-9.031.",)
|
120 |
+
with gr.Row():
|
121 |
+
generate_run_button = gr.Button()
|
122 |
+
generate_run_clear = gr.ClearButton([prompt])
|
123 |
with gr.Accordion("Advanced options", open=False):
|
124 |
num_steps = gr.Slider(label="num_steps", minimum=1, maximum=300, value=200, step=1)
|
125 |
guidance_scale = gr.Slider(label="guidance_scale", minimum=0.1, maximum=8.0, value=3.0, step=0.1)
|