John6666 commited on
Commit
49c377b
β€’
1 Parent(s): 393bada

Upload 2 files

Browse files
Files changed (2) hide show
  1. README.md +13 -13
  2. app.py +106 -109
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: SadTalker (Gradio 4.x)
3
- emoji: 🐒
4
- colorFrom: pink
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 4.42.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: SadTalker GitHub (Gradio 4.x, latest PyTorch)
3
+ emoji: 🐒
4
+ colorFrom: pink
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 4.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,109 +1,106 @@
1
- import spaces
2
- import os, sys
3
- import gradio as gr
4
- from src.gradio_demo import SadTalker
5
-
6
- try:
7
- import webui # in webui
8
- in_webui = True
9
- except:
10
- in_webui = False
11
-
12
-
13
- def toggle_audio_file(choice):
14
- if choice == False:
15
- return gr.update(visible=True), gr.update(visible=False)
16
- else:
17
- return gr.update(visible=False), gr.update(visible=True)
18
-
19
- def ref_video_fn(path_of_ref_video):
20
- if path_of_ref_video is not None:
21
- return gr.update(value=True)
22
- else:
23
- return gr.update(value=False)
24
-
25
- def sadtalker_demo(checkpoint_path='checkpoints', config_path='src/config', warpfn=None):
26
-
27
- sad_talker = SadTalker(checkpoint_path, config_path, lazy_load=True)
28
-
29
- with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
30
- gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
31
- <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
32
- <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
33
- <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
34
-
35
- with gr.Row():
36
- with gr.Column(variant='panel'):
37
- with gr.Tabs(elem_id="sadtalker_source_image"):
38
- with gr.TabItem('Upload image'):
39
- with gr.Row():
40
- source_image = gr.Image(label="Source image", type="filepath", elem_id="img2img_image")
41
-
42
- with gr.Tabs(elem_id="sadtalker_driven_audio"):
43
- with gr.TabItem('Upload OR TTS'):
44
- with gr.Column(variant='panel'):
45
- driven_audio = gr.Audio(label="Input audio", type="filepath")
46
-
47
- if sys.platform != 'win32' and not in_webui:
48
- from src.utils.text2speech import TTSTalker
49
- tts_talker = TTSTalker()
50
- with gr.Column(variant='panel'):
51
- input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
52
- tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
53
- tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
54
-
55
- with gr.Column(variant='panel'):
56
- with gr.Tabs(elem_id="sadtalker_checkbox"):
57
- with gr.TabItem('Settings'):
58
- gr.Markdown("need help? please visit our [best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md) for more detials")
59
- with gr.Column(variant='panel'):
60
- # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
61
- # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
62
- pose_style = gr.Slider(minimum=0, maximum=46, step=1, label="Pose style", value=0) #
63
- size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
64
- preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
65
- is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
66
- batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=2)
67
- enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
68
- submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
69
-
70
- with gr.Tabs(elem_id="sadtalker_genearted"):
71
- gen_video = gr.Video(label="Generated video", format="mp4")
72
-
73
- if warpfn:
74
- submit.click(
75
- fn=warpfn(sad_talker.test),
76
- inputs=[source_image,
77
- driven_audio,
78
- preprocess_type,
79
- is_still_mode,
80
- enhancer,
81
- batch_size,
82
- size_of_image,
83
- pose_style
84
- ],
85
- outputs=[gen_video]
86
- )
87
- else:
88
- submit.click(
89
- fn=sad_talker.test,
90
- inputs=[source_image,
91
- driven_audio,
92
- preprocess_type,
93
- is_still_mode,
94
- enhancer,
95
- batch_size,
96
- size_of_image,
97
- pose_style
98
- ],
99
- outputs=[gen_video]
100
- )
101
-
102
- return sadtalker_interface
103
-
104
-
105
- if __name__ == "__main__":
106
-
107
- demo = sadtalker_demo()
108
- demo.queue()
109
- demo.launch()
 
1
+ import spaces
2
+ import os, sys
3
+ import gradio as gr
4
+ from src.gradio_demo import SadTalker
5
+
6
+ try:
7
+ import webui # in webui
8
+ in_webui = True
9
+ except:
10
+ in_webui = False
11
+
12
+
13
+ def toggle_audio_file(choice):
14
+ if choice == False:
15
+ return gr.update(visible=True), gr.update(visible=False)
16
+ else:
17
+ return gr.update(visible=False), gr.update(visible=True)
18
+
19
+ def ref_video_fn(path_of_ref_video):
20
+ if path_of_ref_video is not None:
21
+ return gr.update(value=True)
22
+ else:
23
+ return gr.update(value=False)
24
+
25
+ def sadtalker_demo(checkpoint_path='checkpoints', config_path='src/config', warpfn=None):
26
+
27
+ sad_talker = SadTalker(checkpoint_path, config_path, lazy_load=True)
28
+
29
+ with gr.Blocks(analytics_enabled=False, theme="Hev832/Applio") as sadtalker_interface:
30
+ gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
31
+ <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
32
+ <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
33
+ <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
34
+
35
+ with gr.Row():
36
+ with gr.Column(variant='panel'):
37
+ with gr.Tabs(elem_id="sadtalker_source_image"):
38
+ with gr.TabItem('Upload image'):
39
+ with gr.Row():
40
+ source_image = gr.Image(label="Source image", type="filepath", elem_id="img2img_image")
41
+
42
+ with gr.TabItem('Upload OR TTS', elem_id="sadtalker_driven_audio"):
43
+ with gr.Column(variant='panel'):
44
+ driven_audio = gr.Audio(label="Input audio", type="filepath")
45
+
46
+ if sys.platform != 'win32' and not in_webui:
47
+ from src.utils.text2speech import TTSTalker
48
+ tts_talker = TTSTalker()
49
+ with gr.Column(variant='panel'):
50
+ input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="please enter some text here, we genreate the audio from text using @Coqui.ai TTS.")
51
+ tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
52
+ tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
53
+
54
+ with gr.TabItem('Settings', elem_id="sadtalker_checkbox"):
55
+ gr.Markdown("need help? please visit our [best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md) for more detials")
56
+ with gr.Column(variant='panel'):
57
+ # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
58
+ # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
59
+ pose_style = gr.Slider(minimum=0, maximum=46, step=1, label="Pose style", value=0) #
60
+ size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
61
+ preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
62
+ is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
63
+ batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=2)
64
+ enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
65
+ submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
66
+
67
+ with gr.Tabs(elem_id="sadtalker_genearted"):
68
+ gen_video = gr.Video(label="Generated video", format="mp4")
69
+
70
+ if warpfn:
71
+ submit.click(
72
+ fn=warpfn(sad_talker.test),
73
+ inputs=[source_image,
74
+ driven_audio,
75
+ preprocess_type,
76
+ is_still_mode,
77
+ enhancer,
78
+ batch_size,
79
+ size_of_image,
80
+ pose_style
81
+ ],
82
+ outputs=[gen_video]
83
+ )
84
+ else:
85
+ submit.click(
86
+ fn=sad_talker.test,
87
+ inputs=[source_image,
88
+ driven_audio,
89
+ preprocess_type,
90
+ is_still_mode,
91
+ enhancer,
92
+ batch_size,
93
+ size_of_image,
94
+ pose_style
95
+ ],
96
+ outputs=[gen_video]
97
+ )
98
+
99
+ return sadtalker_interface
100
+
101
+
102
+ if __name__ == "__main__":
103
+
104
+ demo = sadtalker_demo()
105
+ demo.queue()
106
+ demo.launch()