Upload 18 files
Browse files- roop/ProcessMgr.py +223 -27
- roop/ProcessOptions.py +5 -2
- roop/StreamWriter.py +60 -0
- roop/capturer.py +21 -5
- roop/core.py +40 -14
- roop/face_util.py +11 -8
- roop/globals.py +3 -0
- roop/metadata.py +1 -1
- roop/util_ffmpeg.py +22 -2
- roop/utilities.py +39 -0
- roop/virtualcam.py +8 -7
roop/ProcessMgr.py
CHANGED
@@ -3,10 +3,9 @@ import cv2
|
|
3 |
import numpy as np
|
4 |
import psutil
|
5 |
|
6 |
-
from enum import Enum
|
7 |
from roop.ProcessOptions import ProcessOptions
|
8 |
|
9 |
-
from roop.face_util import get_first_face, get_all_faces,
|
10 |
from roop.utilities import compute_cosine_distance, get_device, str_to_class
|
11 |
import roop.vr_util as vr
|
12 |
|
@@ -17,15 +16,18 @@ from threading import Thread, Lock
|
|
17 |
from queue import Queue
|
18 |
from tqdm import tqdm
|
19 |
from roop.ffmpeg_writer import FFMPEG_VideoWriter
|
|
|
20 |
import roop.globals
|
21 |
|
22 |
|
|
|
23 |
# Poor man's enum to be able to compare to int
|
24 |
class eNoFaceAction():
|
25 |
USE_ORIGINAL_FRAME = 0
|
26 |
RETRY_ROTATED = 1
|
27 |
SKIP_FRAME = 2
|
28 |
-
SKIP_FRAME_IF_DISSIMILAR = 3
|
|
|
29 |
|
30 |
|
31 |
|
@@ -44,6 +46,7 @@ def pick_queue(queue: Queue[str], queue_per_future: int) -> List[str]:
|
|
44 |
return queues
|
45 |
|
46 |
|
|
|
47 |
class ProcessMgr():
|
48 |
input_face_datas = []
|
49 |
target_face_datas = []
|
@@ -64,11 +67,16 @@ class ProcessMgr():
|
|
64 |
processed_queue = None
|
65 |
|
66 |
videowriter= None
|
|
|
67 |
|
68 |
progress_gradio = None
|
69 |
total_frames = 0
|
70 |
|
71 |
-
|
|
|
|
|
|
|
|
|
72 |
|
73 |
|
74 |
plugins = {
|
@@ -101,6 +109,8 @@ class ProcessMgr():
|
|
101 |
def initialize(self, input_faces, target_faces, options):
|
102 |
self.input_face_datas = input_faces
|
103 |
self.target_face_datas = target_faces
|
|
|
|
|
104 |
self.options = options
|
105 |
devicename = get_device()
|
106 |
|
@@ -185,7 +195,8 @@ class ProcessMgr():
|
|
185 |
resimg = self.process_frame(temp_frame)
|
186 |
if resimg is not None:
|
187 |
i = source_files.index(f)
|
188 |
-
|
|
|
189 |
if update:
|
190 |
update()
|
191 |
|
@@ -239,7 +250,10 @@ class ProcessMgr():
|
|
239 |
process, frame = self.processed_queue[nextindex % self.num_threads].get()
|
240 |
nextindex += 1
|
241 |
if frame is not None:
|
242 |
-
self.
|
|
|
|
|
|
|
243 |
del frame
|
244 |
elif process == False:
|
245 |
num_producers -= 1
|
@@ -248,7 +262,11 @@ class ProcessMgr():
|
|
248 |
|
249 |
|
250 |
|
251 |
-
def run_batch_inmem(self, source_video, target_video, frame_start, frame_end, fps, threads:int = 1
|
|
|
|
|
|
|
|
|
252 |
cap = cv2.VideoCapture(source_video)
|
253 |
# frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
254 |
frame_count = (frame_end - frame_start) + 1
|
@@ -275,7 +293,13 @@ class ProcessMgr():
|
|
275 |
self.frames_queue.append(Queue(1))
|
276 |
self.processed_queue.append(Queue(1))
|
277 |
|
278 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
|
281 |
readthread.start()
|
@@ -298,7 +322,11 @@ class ProcessMgr():
|
|
298 |
readthread.join()
|
299 |
writethread.join()
|
300 |
cap.release()
|
301 |
-
self.
|
|
|
|
|
|
|
|
|
302 |
self.frames_queue.clear()
|
303 |
self.processed_queue.clear()
|
304 |
|
@@ -317,11 +345,6 @@ class ProcessMgr():
|
|
317 |
self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
|
318 |
|
319 |
|
320 |
-
# https://github.com/deepinsight/insightface#third-party-re-implementation-of-arcface
|
321 |
-
# https://github.com/deepinsight/insightface/blob/master/alignment/coordinate_reg/image_infer.py
|
322 |
-
# https://github.com/deepinsight/insightface/issues/1350
|
323 |
-
# https://github.com/linghu8812/tensorrt_inference
|
324 |
-
|
325 |
|
326 |
def process_frame(self, frame:Frame):
|
327 |
if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
|
@@ -332,8 +355,16 @@ class ProcessMgr():
|
|
332 |
if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME_IF_DISSIMILAR:
|
333 |
if len(self.input_face_datas) > num_swapped:
|
334 |
return None
|
|
|
|
|
335 |
return temp_frame
|
336 |
-
if roop.globals.no_face_action == eNoFaceAction.
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
return frame
|
338 |
if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME:
|
339 |
#This only works with in-mem processing, as it simply skips the frame.
|
@@ -374,6 +405,8 @@ class ProcessMgr():
|
|
374 |
|
375 |
num_faces_found += 1
|
376 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
|
|
|
|
377 |
else:
|
378 |
faces = get_all_faces(frame)
|
379 |
if faces is None:
|
@@ -383,7 +416,14 @@ class ProcessMgr():
|
|
383 |
for face in faces:
|
384 |
num_faces_found += 1
|
385 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
elif self.options.swap_mode == "selected":
|
389 |
num_targetfaces = len(self.target_face_datas)
|
@@ -397,7 +437,6 @@ class ProcessMgr():
|
|
397 |
else:
|
398 |
temp_frame = self.process_face(i, face, temp_frame)
|
399 |
num_faces_found += 1
|
400 |
-
del face
|
401 |
if not roop.globals.vr_mode and num_faces_found == num_targetfaces:
|
402 |
break
|
403 |
elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
|
@@ -406,7 +445,13 @@ class ProcessMgr():
|
|
406 |
if face.sex == gender:
|
407 |
num_faces_found += 1
|
408 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
410 |
|
411 |
if roop.globals.vr_mode and num_faces_found % 2 > 0:
|
412 |
# stereo image, there has to be an even number of faces
|
@@ -541,17 +586,31 @@ class ProcessMgr():
|
|
541 |
|
542 |
# img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image
|
543 |
|
544 |
-
|
545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
fake_frame = aligned_img
|
547 |
-
swap_frame = aligned_img
|
548 |
target_face.matrix = M
|
|
|
549 |
for p in self.processors:
|
550 |
if p.type == 'swap':
|
551 |
-
|
|
|
|
|
552 |
for _ in range(0,self.options.num_swap_steps):
|
553 |
-
|
554 |
-
|
|
|
|
|
|
|
|
|
555 |
scale_factor = 0.0
|
556 |
elif p.type == 'mask':
|
557 |
fake_frame = self.process_mask(p, aligned_img, fake_frame)
|
@@ -560,8 +619,8 @@ class ProcessMgr():
|
|
560 |
|
561 |
upscale = 512
|
562 |
orig_width = fake_frame.shape[1]
|
563 |
-
|
564 |
-
|
565 |
mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets
|
566 |
|
567 |
|
@@ -571,9 +630,14 @@ class ProcessMgr():
|
|
571 |
else:
|
572 |
result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
|
573 |
|
|
|
|
|
|
|
|
|
|
|
574 |
if rotation_action is not None:
|
575 |
fake_frame = self.auto_unrotate_frame(result, rotation_action)
|
576 |
-
|
577 |
|
578 |
return result
|
579 |
|
@@ -673,6 +737,43 @@ class ProcessMgr():
|
|
673 |
return cv2.GaussianBlur(img_matte, blur_size, 0)
|
674 |
|
675 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
676 |
def process_mask(self, processor, frame:Frame, target:Frame):
|
677 |
img_mask = processor.Run(frame, self.options.masking_text)
|
678 |
img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
|
@@ -688,7 +789,98 @@ class ProcessMgr():
|
|
688 |
result += img_mask * frame.astype(np.float32)
|
689 |
return np.uint8(result)
|
690 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
691 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
692 |
|
693 |
|
694 |
def unload_models():
|
@@ -699,4 +891,8 @@ class ProcessMgr():
|
|
699 |
for p in self.processors:
|
700 |
p.Release()
|
701 |
self.processors.clear()
|
|
|
|
|
|
|
|
|
702 |
|
|
|
3 |
import numpy as np
|
4 |
import psutil
|
5 |
|
|
|
6 |
from roop.ProcessOptions import ProcessOptions
|
7 |
|
8 |
+
from roop.face_util import get_first_face, get_all_faces, rotate_anticlockwise, rotate_clockwise, clamp_cut_values
|
9 |
from roop.utilities import compute_cosine_distance, get_device, str_to_class
|
10 |
import roop.vr_util as vr
|
11 |
|
|
|
16 |
from queue import Queue
|
17 |
from tqdm import tqdm
|
18 |
from roop.ffmpeg_writer import FFMPEG_VideoWriter
|
19 |
+
from roop.StreamWriter import StreamWriter
|
20 |
import roop.globals
|
21 |
|
22 |
|
23 |
+
|
24 |
# Poor man's enum to be able to compare to int
|
25 |
class eNoFaceAction():
|
26 |
USE_ORIGINAL_FRAME = 0
|
27 |
RETRY_ROTATED = 1
|
28 |
SKIP_FRAME = 2
|
29 |
+
SKIP_FRAME_IF_DISSIMILAR = 3,
|
30 |
+
USE_LAST_SWAPPED = 4
|
31 |
|
32 |
|
33 |
|
|
|
46 |
return queues
|
47 |
|
48 |
|
49 |
+
|
50 |
class ProcessMgr():
|
51 |
input_face_datas = []
|
52 |
target_face_datas = []
|
|
|
67 |
processed_queue = None
|
68 |
|
69 |
videowriter= None
|
70 |
+
streamwriter = None
|
71 |
|
72 |
progress_gradio = None
|
73 |
total_frames = 0
|
74 |
|
75 |
+
num_frames_no_face = 0
|
76 |
+
last_swapped_frame = None
|
77 |
+
|
78 |
+
output_to_file = None
|
79 |
+
output_to_cam = None
|
80 |
|
81 |
|
82 |
plugins = {
|
|
|
109 |
def initialize(self, input_faces, target_faces, options):
|
110 |
self.input_face_datas = input_faces
|
111 |
self.target_face_datas = target_faces
|
112 |
+
self.num_frames_no_face = 0
|
113 |
+
self.last_swapped_frame = None
|
114 |
self.options = options
|
115 |
devicename = get_device()
|
116 |
|
|
|
195 |
resimg = self.process_frame(temp_frame)
|
196 |
if resimg is not None:
|
197 |
i = source_files.index(f)
|
198 |
+
# Also let numpy write the file to support utf-8/16 filenames
|
199 |
+
cv2.imencode(f'.{roop.globals.CFG.output_image_format}',resimg)[1].tofile(target_files[i])
|
200 |
if update:
|
201 |
update()
|
202 |
|
|
|
250 |
process, frame = self.processed_queue[nextindex % self.num_threads].get()
|
251 |
nextindex += 1
|
252 |
if frame is not None:
|
253 |
+
if self.output_to_file:
|
254 |
+
self.videowriter.write_frame(frame)
|
255 |
+
if self.output_to_cam:
|
256 |
+
self.streamwriter.WriteToStream(frame)
|
257 |
del frame
|
258 |
elif process == False:
|
259 |
num_producers -= 1
|
|
|
262 |
|
263 |
|
264 |
|
265 |
+
def run_batch_inmem(self, output_method, source_video, target_video, frame_start, frame_end, fps, threads:int = 1):
|
266 |
+
if len(self.processors) < 1:
|
267 |
+
print("No processor defined!")
|
268 |
+
return
|
269 |
+
|
270 |
cap = cv2.VideoCapture(source_video)
|
271 |
# frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
272 |
frame_count = (frame_end - frame_start) + 1
|
|
|
293 |
self.frames_queue.append(Queue(1))
|
294 |
self.processed_queue.append(Queue(1))
|
295 |
|
296 |
+
self.output_to_file = output_method != "Virtual Camera"
|
297 |
+
self.output_to_cam = output_method == "Virtual Camera" or output_method == "Both"
|
298 |
+
|
299 |
+
if self.output_to_file:
|
300 |
+
self.videowriter = FFMPEG_VideoWriter(target_video, (width, height), fps, codec=roop.globals.video_encoder, crf=roop.globals.video_quality, audiofile=None)
|
301 |
+
if self.output_to_cam:
|
302 |
+
self.streamwriter = StreamWriter((width, height), int(fps))
|
303 |
|
304 |
readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
|
305 |
readthread.start()
|
|
|
322 |
readthread.join()
|
323 |
writethread.join()
|
324 |
cap.release()
|
325 |
+
if self.output_to_file:
|
326 |
+
self.videowriter.close()
|
327 |
+
if self.output_to_cam:
|
328 |
+
self.streamwriter.Close()
|
329 |
+
|
330 |
self.frames_queue.clear()
|
331 |
self.processed_queue.clear()
|
332 |
|
|
|
345 |
self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
|
346 |
|
347 |
|
|
|
|
|
|
|
|
|
|
|
348 |
|
349 |
def process_frame(self, frame:Frame):
|
350 |
if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
|
|
|
355 |
if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME_IF_DISSIMILAR:
|
356 |
if len(self.input_face_datas) > num_swapped:
|
357 |
return None
|
358 |
+
self.num_frames_no_face = 0
|
359 |
+
self.last_swapped_frame = temp_frame.copy()
|
360 |
return temp_frame
|
361 |
+
if roop.globals.no_face_action == eNoFaceAction.USE_LAST_SWAPPED:
|
362 |
+
if self.last_swapped_frame is not None and self.num_frames_no_face < self.options.max_num_reuse_frame:
|
363 |
+
self.num_frames_no_face += 1
|
364 |
+
return self.last_swapped_frame.copy()
|
365 |
+
return frame
|
366 |
+
|
367 |
+
elif roop.globals.no_face_action == eNoFaceAction.USE_ORIGINAL_FRAME:
|
368 |
return frame
|
369 |
if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME:
|
370 |
#This only works with in-mem processing, as it simply skips the frame.
|
|
|
405 |
|
406 |
num_faces_found += 1
|
407 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
408 |
+
del face
|
409 |
+
|
410 |
else:
|
411 |
faces = get_all_faces(frame)
|
412 |
if faces is None:
|
|
|
416 |
for face in faces:
|
417 |
num_faces_found += 1
|
418 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
419 |
+
|
420 |
+
elif self.options.swap_mode == "all_input":
|
421 |
+
for i,face in enumerate(faces):
|
422 |
+
num_faces_found += 1
|
423 |
+
if i < len(self.input_face_datas):
|
424 |
+
temp_frame = self.process_face(i, face, temp_frame)
|
425 |
+
else:
|
426 |
+
break
|
427 |
|
428 |
elif self.options.swap_mode == "selected":
|
429 |
num_targetfaces = len(self.target_face_datas)
|
|
|
437 |
else:
|
438 |
temp_frame = self.process_face(i, face, temp_frame)
|
439 |
num_faces_found += 1
|
|
|
440 |
if not roop.globals.vr_mode and num_faces_found == num_targetfaces:
|
441 |
break
|
442 |
elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
|
|
|
445 |
if face.sex == gender:
|
446 |
num_faces_found += 1
|
447 |
temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
|
448 |
+
|
449 |
+
# might be slower but way more clean to release everything here
|
450 |
+
for face in faces:
|
451 |
+
del face
|
452 |
+
faces.clear()
|
453 |
+
|
454 |
+
|
455 |
|
456 |
if roop.globals.vr_mode and num_faces_found % 2 > 0:
|
457 |
# stereo image, there has to be an even number of faces
|
|
|
586 |
|
587 |
# img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image
|
588 |
|
589 |
+
|
590 |
+
""" Code ported/adapted from Facefusion which borrowed the idea from Rope:
|
591 |
+
Kind of subsampling the cutout and aligned face image and faceswapping slices of it up to
|
592 |
+
the desired output resolution. This works around the current resolution limitations without using enhancers.
|
593 |
+
"""
|
594 |
+
model_output_size = 128
|
595 |
+
subsample_size = self.options.subsample_size
|
596 |
+
subsample_total = subsample_size // model_output_size
|
597 |
+
aligned_img, M = align_crop(frame, target_face.kps, subsample_size)
|
598 |
+
|
599 |
fake_frame = aligned_img
|
|
|
600 |
target_face.matrix = M
|
601 |
+
|
602 |
for p in self.processors:
|
603 |
if p.type == 'swap':
|
604 |
+
swap_result_frames = []
|
605 |
+
subsample_frames = self.implode_pixel_boost(aligned_img, model_output_size, subsample_total)
|
606 |
+
for sliced_frame in subsample_frames:
|
607 |
for _ in range(0,self.options.num_swap_steps):
|
608 |
+
sliced_frame = self.prepare_crop_frame(sliced_frame)
|
609 |
+
sliced_frame = p.Run(inputface, target_face, sliced_frame)
|
610 |
+
sliced_frame = self.normalize_swap_frame(sliced_frame)
|
611 |
+
swap_result_frames.append(sliced_frame)
|
612 |
+
fake_frame = self.explode_pixel_boost(swap_result_frames, model_output_size, subsample_total, subsample_size)
|
613 |
+
fake_frame = fake_frame.astype(np.uint8)
|
614 |
scale_factor = 0.0
|
615 |
elif p.type == 'mask':
|
616 |
fake_frame = self.process_mask(p, aligned_img, fake_frame)
|
|
|
619 |
|
620 |
upscale = 512
|
621 |
orig_width = fake_frame.shape[1]
|
622 |
+
if orig_width != upscale:
|
623 |
+
fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
|
624 |
mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets
|
625 |
|
626 |
|
|
|
630 |
else:
|
631 |
result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
|
632 |
|
633 |
+
# Restore mouth before unrotating
|
634 |
+
if self.options.restore_original_mouth:
|
635 |
+
mouth_cutout, mouth_bb = self.create_mouth_mask(target_face, frame)
|
636 |
+
result = self.apply_mouth_area(result, mouth_cutout, mouth_bb)
|
637 |
+
|
638 |
if rotation_action is not None:
|
639 |
fake_frame = self.auto_unrotate_frame(result, rotation_action)
|
640 |
+
result = self.paste_simple(fake_frame, saved_frame, startX, startY)
|
641 |
|
642 |
return result
|
643 |
|
|
|
737 |
return cv2.GaussianBlur(img_matte, blur_size, 0)
|
738 |
|
739 |
|
740 |
+
def prepare_crop_frame(self, swap_frame):
|
741 |
+
model_type = 'inswapper'
|
742 |
+
model_mean = [0.0, 0.0, 0.0]
|
743 |
+
model_standard_deviation = [1.0, 1.0, 1.0]
|
744 |
+
|
745 |
+
if model_type == 'ghost':
|
746 |
+
swap_frame = swap_frame[:, :, ::-1] / 127.5 - 1
|
747 |
+
else:
|
748 |
+
swap_frame = swap_frame[:, :, ::-1] / 255.0
|
749 |
+
swap_frame = (swap_frame - model_mean) / model_standard_deviation
|
750 |
+
swap_frame = swap_frame.transpose(2, 0, 1)
|
751 |
+
swap_frame = np.expand_dims(swap_frame, axis = 0).astype(np.float32)
|
752 |
+
return swap_frame
|
753 |
+
|
754 |
+
|
755 |
+
def normalize_swap_frame(self, swap_frame):
|
756 |
+
model_type = 'inswapper'
|
757 |
+
swap_frame = swap_frame.transpose(1, 2, 0)
|
758 |
+
|
759 |
+
if model_type == 'ghost':
|
760 |
+
swap_frame = (swap_frame * 127.5 + 127.5).round()
|
761 |
+
else:
|
762 |
+
swap_frame = (swap_frame * 255.0).round()
|
763 |
+
swap_frame = swap_frame[:, :, ::-1]
|
764 |
+
return swap_frame
|
765 |
+
|
766 |
+
def implode_pixel_boost(self, aligned_face_frame, model_size, pixel_boost_total : int):
|
767 |
+
subsample_frame = aligned_face_frame.reshape(model_size, pixel_boost_total, model_size, pixel_boost_total, 3)
|
768 |
+
subsample_frame = subsample_frame.transpose(1, 3, 0, 2, 4).reshape(pixel_boost_total ** 2, model_size, model_size, 3)
|
769 |
+
return subsample_frame
|
770 |
+
|
771 |
+
|
772 |
+
def explode_pixel_boost(self, subsample_frame, model_size, pixel_boost_total, pixel_boost_size):
|
773 |
+
final_frame = np.stack(subsample_frame, axis = 0).reshape(pixel_boost_total, pixel_boost_total, model_size, model_size, 3)
|
774 |
+
final_frame = final_frame.transpose(2, 0, 3, 1, 4).reshape(pixel_boost_size, pixel_boost_size, 3)
|
775 |
+
return final_frame
|
776 |
+
|
777 |
def process_mask(self, processor, frame:Frame, target:Frame):
|
778 |
img_mask = processor.Run(frame, self.options.masking_text)
|
779 |
img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
|
|
|
789 |
result += img_mask * frame.astype(np.float32)
|
790 |
return np.uint8(result)
|
791 |
|
792 |
+
|
793 |
+
# Code for mouth restoration adapted from https://github.com/iVideoGameBoss/iRoopDeepFaceCam
|
794 |
+
|
795 |
+
def create_mouth_mask(self, face: Face, frame: Frame):
|
796 |
+
mouth_cutout = None
|
797 |
+
|
798 |
+
landmarks = face.landmark_2d_106
|
799 |
+
if landmarks is not None:
|
800 |
+
# Get mouth landmarks (indices 52 to 71 typically represent the outer mouth)
|
801 |
+
mouth_points = landmarks[52:71].astype(np.int32)
|
802 |
+
|
803 |
+
# Add padding to mouth area
|
804 |
+
min_x, min_y = np.min(mouth_points, axis=0)
|
805 |
+
max_x, max_y = np.max(mouth_points, axis=0)
|
806 |
+
min_x = max(0, min_x - (15*6))
|
807 |
+
min_y = max(0, min_y - 22)
|
808 |
+
max_x = min(frame.shape[1], max_x + (15*6))
|
809 |
+
max_y = min(frame.shape[0], max_y + (90*6))
|
810 |
+
|
811 |
+
# Extract the mouth area from the frame using the calculated bounding box
|
812 |
+
mouth_cutout = frame[min_y:max_y, min_x:max_x].copy()
|
813 |
+
|
814 |
+
return mouth_cutout, (min_x, min_y, max_x, max_y)
|
815 |
+
|
816 |
+
|
817 |
+
|
818 |
+
def create_feathered_mask(self, shape, feather_amount=30):
|
819 |
+
mask = np.zeros(shape[:2], dtype=np.float32)
|
820 |
+
center = (shape[1] // 2, shape[0] // 2)
|
821 |
+
cv2.ellipse(mask, center, (shape[1] // 2 - feather_amount, shape[0] // 2 - feather_amount),
|
822 |
+
0, 0, 360, 1, -1)
|
823 |
+
mask = cv2.GaussianBlur(mask, (feather_amount*2+1, feather_amount*2+1), 0)
|
824 |
+
return mask / np.max(mask)
|
825 |
+
|
826 |
+
def apply_mouth_area(self, frame: np.ndarray, mouth_cutout: np.ndarray, mouth_box: tuple) -> np.ndarray:
|
827 |
+
min_x, min_y, max_x, max_y = mouth_box
|
828 |
+
box_width = max_x - min_x
|
829 |
+
box_height = max_y - min_y
|
830 |
+
|
831 |
+
|
832 |
+
# Resize the mouth cutout to match the mouth box size
|
833 |
+
if mouth_cutout is None or box_width is None or box_height is None:
|
834 |
+
return frame
|
835 |
+
try:
|
836 |
+
resized_mouth_cutout = cv2.resize(mouth_cutout, (box_width, box_height))
|
837 |
+
|
838 |
+
# Extract the region of interest (ROI) from the target frame
|
839 |
+
roi = frame[min_y:max_y, min_x:max_x]
|
840 |
+
|
841 |
+
# Ensure the ROI and resized_mouth_cutout have the same shape
|
842 |
+
if roi.shape != resized_mouth_cutout.shape:
|
843 |
+
resized_mouth_cutout = cv2.resize(resized_mouth_cutout, (roi.shape[1], roi.shape[0]))
|
844 |
+
|
845 |
+
# Apply color transfer from ROI to mouth cutout
|
846 |
+
color_corrected_mouth = self.apply_color_transfer(resized_mouth_cutout, roi)
|
847 |
+
|
848 |
+
# Create a feathered mask with increased feather amount
|
849 |
+
feather_amount = min(30, box_width // 15, box_height // 15)
|
850 |
+
mask = self.create_feathered_mask(resized_mouth_cutout.shape, feather_amount)
|
851 |
+
|
852 |
+
# Blend the color-corrected mouth cutout with the ROI using the feathered mask
|
853 |
+
mask = mask[:,:,np.newaxis] # Add channel dimension to mask
|
854 |
+
blended = (color_corrected_mouth * mask + roi * (1 - mask)).astype(np.uint8)
|
855 |
|
856 |
+
# Place the blended result back into the frame
|
857 |
+
frame[min_y:max_y, min_x:max_x] = blended
|
858 |
+
except Exception as e:
|
859 |
+
print(f'Error {e}')
|
860 |
+
pass
|
861 |
+
|
862 |
+
return frame
|
863 |
+
|
864 |
+
def apply_color_transfer(self, source, target):
|
865 |
+
"""
|
866 |
+
Apply color transfer from target to source image
|
867 |
+
"""
|
868 |
+
source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32")
|
869 |
+
target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32")
|
870 |
+
|
871 |
+
source_mean, source_std = cv2.meanStdDev(source)
|
872 |
+
target_mean, target_std = cv2.meanStdDev(target)
|
873 |
+
|
874 |
+
# Reshape mean and std to be broadcastable
|
875 |
+
source_mean = source_mean.reshape(1, 1, 3)
|
876 |
+
source_std = source_std.reshape(1, 1, 3)
|
877 |
+
target_mean = target_mean.reshape(1, 1, 3)
|
878 |
+
target_std = target_std.reshape(1, 1, 3)
|
879 |
+
|
880 |
+
# Perform the color transfer
|
881 |
+
source = (source - source_mean) * (target_std / source_std) + target_mean
|
882 |
+
return cv2.cvtColor(np.clip(source, 0, 255).astype("uint8"), cv2.COLOR_LAB2BGR)
|
883 |
+
|
884 |
|
885 |
|
886 |
def unload_models():
|
|
|
891 |
for p in self.processors:
|
892 |
p.Release()
|
893 |
self.processors.clear()
|
894 |
+
if self.videowriter is not None:
|
895 |
+
self.videowriter.close()
|
896 |
+
if self.streamwriter is not None:
|
897 |
+
self.streamwriter.Close()
|
898 |
|
roop/ProcessOptions.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
class ProcessOptions:
|
2 |
|
3 |
-
def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, show_face_area, show_mask=False):
|
4 |
self.processors = processordefines
|
5 |
self.face_distance_threshold = face_distance
|
6 |
self.blend_ratio = blend_ratio
|
@@ -10,4 +10,7 @@ class ProcessOptions:
|
|
10 |
self.imagemask = imagemask
|
11 |
self.num_swap_steps = num_steps
|
12 |
self.show_face_area_overlay = show_face_area
|
13 |
-
self.show_face_masking = show_mask
|
|
|
|
|
|
|
|
1 |
class ProcessOptions:
|
2 |
|
3 |
+
def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, subsample_size, show_face_area, restore_original_mouth, show_mask=False):
|
4 |
self.processors = processordefines
|
5 |
self.face_distance_threshold = face_distance
|
6 |
self.blend_ratio = blend_ratio
|
|
|
10 |
self.imagemask = imagemask
|
11 |
self.num_swap_steps = num_steps
|
12 |
self.show_face_area_overlay = show_face_area
|
13 |
+
self.show_face_masking = show_mask
|
14 |
+
self.subsample_size = subsample_size
|
15 |
+
self.restore_original_mouth = restore_original_mouth
|
16 |
+
self.max_num_reuse_frame = 15
|
roop/StreamWriter.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import threading
|
2 |
+
import time
|
3 |
+
import pyvirtualcam
|
4 |
+
|
5 |
+
|
6 |
+
class StreamWriter():
|
7 |
+
FPS = 30
|
8 |
+
VCam = None
|
9 |
+
Active = False
|
10 |
+
THREAD_LOCK_STREAM = threading.Lock()
|
11 |
+
time_last_process = None
|
12 |
+
timespan_min = 0.0
|
13 |
+
|
14 |
+
def __enter__(self):
|
15 |
+
return self
|
16 |
+
|
17 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
18 |
+
self.Close()
|
19 |
+
|
20 |
+
def __init__(self, size, fps):
|
21 |
+
self.time_last_process = time.perf_counter()
|
22 |
+
self.FPS = fps
|
23 |
+
self.timespan_min = 1.0 / fps
|
24 |
+
print('Detecting virtual cam devices')
|
25 |
+
self.VCam = pyvirtualcam.Camera(width=size[0], height=size[1], fps=fps, fmt=pyvirtualcam.PixelFormat.BGR, print_fps=False)
|
26 |
+
if self.VCam is None:
|
27 |
+
print("No virtual camera found!")
|
28 |
+
return
|
29 |
+
print(f'Using virtual camera: {self.VCam.device}')
|
30 |
+
print(f'Using {self.VCam.native_fmt}')
|
31 |
+
self.Active = True
|
32 |
+
|
33 |
+
|
34 |
+
def LimitFrames(self):
|
35 |
+
while True:
|
36 |
+
current_time = time.perf_counter()
|
37 |
+
time_passed = current_time - self.time_last_process
|
38 |
+
if time_passed >= self.timespan_min:
|
39 |
+
break
|
40 |
+
|
41 |
+
# First version used a queue and threading. Surprisingly this
|
42 |
+
# totally simple, blocking version is 10 times faster!
|
43 |
+
def WriteToStream(self, frame):
|
44 |
+
if self.VCam is None:
|
45 |
+
return
|
46 |
+
with self.THREAD_LOCK_STREAM:
|
47 |
+
self.LimitFrames()
|
48 |
+
self.VCam.send(frame)
|
49 |
+
self.time_last_process = time.perf_counter()
|
50 |
+
|
51 |
+
|
52 |
+
def Close(self):
|
53 |
+
self.Active = False
|
54 |
+
if self.VCam is None:
|
55 |
+
self.VCam.close()
|
56 |
+
self.VCam = None
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
roop/capturer.py
CHANGED
@@ -4,6 +4,10 @@ import numpy as np
|
|
4 |
|
5 |
from roop.typing import Frame
|
6 |
|
|
|
|
|
|
|
|
|
7 |
def get_image_frame(filename: str):
|
8 |
try:
|
9 |
return cv2.imdecode(np.fromfile(filename, dtype=np.uint8), cv2.IMREAD_COLOR)
|
@@ -13,15 +17,27 @@ def get_image_frame(filename: str):
|
|
13 |
|
14 |
|
15 |
def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
if has_frame:
|
22 |
return frame
|
23 |
return None
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def get_video_frame_total(video_path: str) -> int:
|
27 |
capture = cv2.VideoCapture(video_path)
|
|
|
4 |
|
5 |
from roop.typing import Frame
|
6 |
|
7 |
+
current_video_path = None
|
8 |
+
current_frame_total = 0
|
9 |
+
current_capture = None
|
10 |
+
|
11 |
def get_image_frame(filename: str):
|
12 |
try:
|
13 |
return cv2.imdecode(np.fromfile(filename, dtype=np.uint8), cv2.IMREAD_COLOR)
|
|
|
17 |
|
18 |
|
19 |
def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
|
20 |
+
global current_video_path, current_capture, current_frame_total
|
21 |
+
|
22 |
+
if video_path != current_video_path:
|
23 |
+
release_video()
|
24 |
+
current_capture = cv2.VideoCapture(video_path)
|
25 |
+
current_video_path = video_path
|
26 |
+
current_frame_total = current_capture.get(cv2.CAP_PROP_FRAME_COUNT)
|
27 |
+
|
28 |
+
current_capture.set(cv2.CAP_PROP_POS_FRAMES, min(current_frame_total, frame_number - 1))
|
29 |
+
has_frame, frame = current_capture.read()
|
30 |
if has_frame:
|
31 |
return frame
|
32 |
return None
|
33 |
|
34 |
+
def release_video():
|
35 |
+
global current_capture
|
36 |
+
|
37 |
+
if current_capture is not None:
|
38 |
+
current_capture.release()
|
39 |
+
current_capture = None
|
40 |
+
|
41 |
|
42 |
def get_video_frame_total(video_path: str) -> int:
|
43 |
capture = cv2.VideoCapture(video_path)
|
roop/core.py
CHANGED
@@ -14,6 +14,7 @@ import signal
|
|
14 |
import torch
|
15 |
import onnxruntime
|
16 |
import pathlib
|
|
|
17 |
|
18 |
from time import time
|
19 |
|
@@ -27,7 +28,7 @@ from roop.face_util import extract_face_images
|
|
27 |
from roop.ProcessEntry import ProcessEntry
|
28 |
from roop.ProcessMgr import ProcessMgr
|
29 |
from roop.ProcessOptions import ProcessOptions
|
30 |
-
from roop.capturer import get_video_frame_total
|
31 |
|
32 |
|
33 |
clip_text = None
|
@@ -47,9 +48,12 @@ warnings.filterwarnings('ignore', category=UserWarning, module='torchvision')
|
|
47 |
def parse_args() -> None:
|
48 |
signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
|
49 |
roop.globals.headless = False
|
|
|
|
|
|
|
|
|
|
|
50 |
# Always enable all processors when using GUI
|
51 |
-
if len(sys.argv) > 1:
|
52 |
-
print('No CLI args supported - use Settings Tab instead')
|
53 |
roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
|
54 |
|
55 |
|
@@ -58,8 +62,20 @@ def encode_execution_providers(execution_providers: List[str]) -> List[str]:
|
|
58 |
|
59 |
|
60 |
def decode_execution_providers(execution_providers: List[str]) -> List[str]:
|
61 |
-
|
62 |
if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
|
65 |
def suggest_max_memory() -> int:
|
@@ -204,7 +220,7 @@ def live_swap(frame, options):
|
|
204 |
return newframe
|
205 |
|
206 |
|
207 |
-
def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip_text:str, use_new_method, imagemask, num_swap_steps, progress, selected_index = 0) -> None:
|
208 |
global clip_text, process_mgr
|
209 |
|
210 |
release_resources()
|
@@ -214,9 +230,11 @@ def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip
|
|
214 |
mask = imagemask["layers"][0] if imagemask is not None else None
|
215 |
if len(roop.globals.INPUT_FACESETS) <= selected_index:
|
216 |
selected_index = 0
|
217 |
-
options = ProcessOptions(get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
|
|
|
|
|
218 |
process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
|
219 |
-
batch_process(files, use_new_method)
|
220 |
return
|
221 |
|
222 |
def batch_process_with_options(files:list[ProcessEntry], options, progress):
|
@@ -230,11 +248,11 @@ def batch_process_with_options(files:list[ProcessEntry], options, progress):
|
|
230 |
roop.globals.keep_frames = False
|
231 |
roop.globals.wait_after_extraction = False
|
232 |
roop.globals.skip_audio = False
|
233 |
-
batch_process(files, True)
|
234 |
|
235 |
|
236 |
|
237 |
-
def batch_process(files:list[ProcessEntry], use_new_method) -> None:
|
238 |
global clip_text, process_mgr
|
239 |
|
240 |
roop.globals.processing = True
|
@@ -287,9 +305,12 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
|
|
287 |
if v.endframe == 0:
|
288 |
v.endframe = get_video_frame_total(v.filename)
|
289 |
|
290 |
-
|
|
|
|
|
|
|
291 |
start_processing = time()
|
292 |
-
if roop.globals.keep_frames or not use_new_method:
|
293 |
util.create_temp(v.filename)
|
294 |
update_status('Extracting frames...')
|
295 |
ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
|
@@ -317,7 +338,7 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
|
|
317 |
skip_audio = True
|
318 |
else:
|
319 |
skip_audio = roop.globals.skip_audio
|
320 |
-
process_mgr.run_batch_inmem(v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads
|
321 |
|
322 |
if not roop.globals.processing:
|
323 |
end_processing('Processing stopped!')
|
@@ -346,10 +367,12 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
|
|
346 |
os.remove(video_file_name)
|
347 |
else:
|
348 |
shutil.move(video_file_name, destination)
|
349 |
-
update_status(f'\nProcessing {os.path.basename(destination)} took {time() - start_processing} secs')
|
350 |
|
351 |
-
|
352 |
update_status(f'Failed processing {os.path.basename(v.finalname)}!')
|
|
|
|
|
|
|
353 |
end_processing('Finished')
|
354 |
|
355 |
|
@@ -371,8 +394,11 @@ def run() -> None:
|
|
371 |
if not pre_check():
|
372 |
return
|
373 |
roop.globals.CFG = Settings('config.yaml')
|
|
|
374 |
roop.globals.execution_threads = roop.globals.CFG.max_threads
|
375 |
roop.globals.video_encoder = roop.globals.CFG.output_video_codec
|
376 |
roop.globals.video_quality = roop.globals.CFG.video_quality
|
377 |
roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
|
|
|
|
|
378 |
main.run()
|
|
|
14 |
import torch
|
15 |
import onnxruntime
|
16 |
import pathlib
|
17 |
+
import argparse
|
18 |
|
19 |
from time import time
|
20 |
|
|
|
28 |
from roop.ProcessEntry import ProcessEntry
|
29 |
from roop.ProcessMgr import ProcessMgr
|
30 |
from roop.ProcessOptions import ProcessOptions
|
31 |
+
from roop.capturer import get_video_frame_total, release_video
|
32 |
|
33 |
|
34 |
clip_text = None
|
|
|
48 |
def parse_args() -> None:
|
49 |
signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
|
50 |
roop.globals.headless = False
|
51 |
+
|
52 |
+
program = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=100))
|
53 |
+
program.add_argument('--server_share', help='Public server', dest='server_share', action='store_true', default=False)
|
54 |
+
program.add_argument('--cuda_device_id', help='Index of the cuda gpu to use', dest='cuda_device_id', type=int, default=0)
|
55 |
+
roop.globals.startup_args = program.parse_args()
|
56 |
# Always enable all processors when using GUI
|
|
|
|
|
57 |
roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
|
58 |
|
59 |
|
|
|
62 |
|
63 |
|
64 |
def decode_execution_providers(execution_providers: List[str]) -> List[str]:
|
65 |
+
list_providers = [provider for provider, encoded_execution_provider in zip(onnxruntime.get_available_providers(), encode_execution_providers(onnxruntime.get_available_providers()))
|
66 |
if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
|
67 |
+
|
68 |
+
try:
|
69 |
+
for i in range(len(list_providers)):
|
70 |
+
if list_providers[i] == 'CUDAExecutionProvider':
|
71 |
+
list_providers[i] = ('CUDAExecutionProvider', {'device_id': roop.globals.cuda_device_id})
|
72 |
+
torch.cuda.set_device(roop.globals.cuda_device_id)
|
73 |
+
break
|
74 |
+
except:
|
75 |
+
pass
|
76 |
+
|
77 |
+
return list_providers
|
78 |
+
|
79 |
|
80 |
|
81 |
def suggest_max_memory() -> int:
|
|
|
220 |
return newframe
|
221 |
|
222 |
|
223 |
+
def batch_process_regular(output_method, files:list[ProcessEntry], masking_engine:str, new_clip_text:str, use_new_method, imagemask, restore_original_mouth, num_swap_steps, progress, selected_index = 0) -> None:
|
224 |
global clip_text, process_mgr
|
225 |
|
226 |
release_resources()
|
|
|
230 |
mask = imagemask["layers"][0] if imagemask is not None else None
|
231 |
if len(roop.globals.INPUT_FACESETS) <= selected_index:
|
232 |
selected_index = 0
|
233 |
+
options = ProcessOptions(get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
|
234 |
+
roop.globals.face_swap_mode, selected_index, new_clip_text, mask, num_swap_steps,
|
235 |
+
roop.globals.subsample_size, False, restore_original_mouth)
|
236 |
process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
|
237 |
+
batch_process(output_method, files, use_new_method)
|
238 |
return
|
239 |
|
240 |
def batch_process_with_options(files:list[ProcessEntry], options, progress):
|
|
|
248 |
roop.globals.keep_frames = False
|
249 |
roop.globals.wait_after_extraction = False
|
250 |
roop.globals.skip_audio = False
|
251 |
+
batch_process("Files", files, True)
|
252 |
|
253 |
|
254 |
|
255 |
+
def batch_process(output_method, files:list[ProcessEntry], use_new_method) -> None:
|
256 |
global clip_text, process_mgr
|
257 |
|
258 |
roop.globals.processing = True
|
|
|
305 |
if v.endframe == 0:
|
306 |
v.endframe = get_video_frame_total(v.filename)
|
307 |
|
308 |
+
is_streaming_only = output_method == "Virtual Camera"
|
309 |
+
if is_streaming_only == False:
|
310 |
+
update_status(f'Creating {os.path.basename(v.finalname)} with {fps} FPS...')
|
311 |
+
|
312 |
start_processing = time()
|
313 |
+
if is_streaming_only == False and roop.globals.keep_frames or not use_new_method:
|
314 |
util.create_temp(v.filename)
|
315 |
update_status('Extracting frames...')
|
316 |
ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
|
|
|
338 |
skip_audio = True
|
339 |
else:
|
340 |
skip_audio = roop.globals.skip_audio
|
341 |
+
process_mgr.run_batch_inmem(output_method, v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads)
|
342 |
|
343 |
if not roop.globals.processing:
|
344 |
end_processing('Processing stopped!')
|
|
|
367 |
os.remove(video_file_name)
|
368 |
else:
|
369 |
shutil.move(video_file_name, destination)
|
|
|
370 |
|
371 |
+
elif is_streaming_only == False:
|
372 |
update_status(f'Failed processing {os.path.basename(v.finalname)}!')
|
373 |
+
elapsed_time = time() - start_processing
|
374 |
+
average_fps = (v.endframe - v.startframe) / elapsed_time
|
375 |
+
update_status(f'\nProcessing {os.path.basename(destination)} took {elapsed_time:.2f} secs, {average_fps:.2f} frames/s')
|
376 |
end_processing('Finished')
|
377 |
|
378 |
|
|
|
394 |
if not pre_check():
|
395 |
return
|
396 |
roop.globals.CFG = Settings('config.yaml')
|
397 |
+
roop.globals.cuda_device_id = roop.globals.startup_args.cuda_device_id
|
398 |
roop.globals.execution_threads = roop.globals.CFG.max_threads
|
399 |
roop.globals.video_encoder = roop.globals.CFG.output_video_codec
|
400 |
roop.globals.video_quality = roop.globals.CFG.video_quality
|
401 |
roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
|
402 |
+
if roop.globals.startup_args.server_share:
|
403 |
+
roop.globals.CFG.server_share = True
|
404 |
main.run()
|
roop/face_util.py
CHANGED
@@ -9,18 +9,18 @@ import cv2
|
|
9 |
import numpy as np
|
10 |
from skimage import transform as trans
|
11 |
from roop.capturer import get_video_frame
|
12 |
-
from roop.utilities import resolve_relative_path,
|
13 |
|
14 |
FACE_ANALYSER = None
|
15 |
-
THREAD_LOCK_ANALYSER = threading.Lock()
|
16 |
-
THREAD_LOCK_SWAPPER = threading.Lock()
|
17 |
FACE_SWAPPER = None
|
18 |
|
19 |
|
20 |
def get_face_analyser() -> Any:
|
21 |
global FACE_ANALYSER
|
22 |
|
23 |
-
with
|
24 |
if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
|
25 |
model_path = resolve_relative_path('..')
|
26 |
# removed genderage
|
@@ -210,15 +210,18 @@ arcface_dst = np.array(
|
|
210 |
)
|
211 |
|
212 |
|
213 |
-
def estimate_norm(lmk, image_size=112
|
214 |
assert lmk.shape == (5, 2)
|
215 |
-
assert image_size % 112 == 0 or image_size % 128 == 0
|
216 |
if image_size % 112 == 0:
|
217 |
ratio = float(image_size) / 112.0
|
218 |
diff_x = 0
|
219 |
-
|
220 |
ratio = float(image_size) / 128.0
|
221 |
diff_x = 8.0 * ratio
|
|
|
|
|
|
|
|
|
222 |
dst = arcface_dst * ratio
|
223 |
dst[:, 0] += diff_x
|
224 |
tform = trans.SimilarityTransform()
|
@@ -230,7 +233,7 @@ def estimate_norm(lmk, image_size=112, mode="arcface"):
|
|
230 |
|
231 |
# aligned, M = norm_crop2(f[1], face.kps, 512)
|
232 |
def align_crop(img, landmark, image_size=112, mode="arcface"):
|
233 |
-
M = estimate_norm(landmark, image_size
|
234 |
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
|
235 |
return warped, M
|
236 |
|
|
|
9 |
import numpy as np
|
10 |
from skimage import transform as trans
|
11 |
from roop.capturer import get_video_frame
|
12 |
+
from roop.utilities import resolve_relative_path, conditional_thread_semaphore
|
13 |
|
14 |
FACE_ANALYSER = None
|
15 |
+
#THREAD_LOCK_ANALYSER = threading.Lock()
|
16 |
+
#THREAD_LOCK_SWAPPER = threading.Lock()
|
17 |
FACE_SWAPPER = None
|
18 |
|
19 |
|
20 |
def get_face_analyser() -> Any:
|
21 |
global FACE_ANALYSER
|
22 |
|
23 |
+
with conditional_thread_semaphore():
|
24 |
if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
|
25 |
model_path = resolve_relative_path('..')
|
26 |
# removed genderage
|
|
|
210 |
)
|
211 |
|
212 |
|
213 |
+
def estimate_norm(lmk, image_size=112):
|
214 |
assert lmk.shape == (5, 2)
|
|
|
215 |
if image_size % 112 == 0:
|
216 |
ratio = float(image_size) / 112.0
|
217 |
diff_x = 0
|
218 |
+
elif image_size % 128 == 0:
|
219 |
ratio = float(image_size) / 128.0
|
220 |
diff_x = 8.0 * ratio
|
221 |
+
elif image_size % 512 == 0:
|
222 |
+
ratio = float(image_size) / 512.0
|
223 |
+
diff_x = 32.0 * ratio
|
224 |
+
|
225 |
dst = arcface_dst * ratio
|
226 |
dst[:, 0] += diff_x
|
227 |
tform = trans.SimilarityTransform()
|
|
|
233 |
|
234 |
# aligned, M = norm_crop2(f[1], face.kps, 512)
|
235 |
def align_crop(img, landmark, image_size=112, mode="arcface"):
|
236 |
+
M = estimate_norm(landmark, image_size)
|
237 |
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
|
238 |
return warped, M
|
239 |
|
roop/globals.py
CHANGED
@@ -5,7 +5,9 @@ source_path = None
|
|
5 |
target_path = None
|
6 |
output_path = None
|
7 |
target_folder_path = None
|
|
|
8 |
|
|
|
9 |
frame_processors: List[str] = []
|
10 |
keep_fps = None
|
11 |
keep_frames = None
|
@@ -26,6 +28,7 @@ execution_threads = None
|
|
26 |
headless = None
|
27 |
log_level = 'error'
|
28 |
selected_enhancer = None
|
|
|
29 |
face_swap_mode = None
|
30 |
blend_ratio = 0.5
|
31 |
distance_threshold = 0.65
|
|
|
5 |
target_path = None
|
6 |
output_path = None
|
7 |
target_folder_path = None
|
8 |
+
startup_args = None
|
9 |
|
10 |
+
cuda_device_id = 0
|
11 |
frame_processors: List[str] = []
|
12 |
keep_fps = None
|
13 |
keep_frames = None
|
|
|
28 |
headless = None
|
29 |
log_level = 'error'
|
30 |
selected_enhancer = None
|
31 |
+
subsample_size = 128
|
32 |
face_swap_mode = None
|
33 |
blend_ratio = 0.5
|
34 |
distance_threshold = 0.65
|
roop/metadata.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
name = 'roop unleashed'
|
2 |
-
version = '4.
|
|
|
1 |
name = 'roop unleashed'
|
2 |
+
version = '4.3.3'
|
roop/util_ffmpeg.py
CHANGED
@@ -73,12 +73,32 @@ def create_video(target_path: str, dest_filename: str, fps: float = 24.0, temp_d
|
|
73 |
|
74 |
|
75 |
def create_gif_from_video(video_path: str, gif_path):
|
76 |
-
from roop.capturer import get_video_frame
|
77 |
|
78 |
fps = util.detect_fps(video_path)
|
79 |
frame = get_video_frame(video_path)
|
|
|
80 |
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
|
84 |
def restore_audio(intermediate_video: str, original_video: str, trim_frame_start, trim_frame_end, final_video : str) -> None:
|
|
|
73 |
|
74 |
|
75 |
def create_gif_from_video(video_path: str, gif_path):
|
76 |
+
from roop.capturer import get_video_frame, release_video
|
77 |
|
78 |
fps = util.detect_fps(video_path)
|
79 |
frame = get_video_frame(video_path)
|
80 |
+
release_video()
|
81 |
|
82 |
+
scalex = frame.shape[0]
|
83 |
+
scaley = frame.shape[1]
|
84 |
+
|
85 |
+
if scalex >= scaley:
|
86 |
+
scaley = -1
|
87 |
+
else:
|
88 |
+
scalex = -1
|
89 |
+
|
90 |
+
run_ffmpeg(['-i', video_path, '-vf', f'fps={fps},scale={int(scalex)}:{int(scaley)}:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse', '-loop', '0', gif_path])
|
91 |
+
|
92 |
+
|
93 |
+
|
94 |
+
def create_video_from_gif(gif_path: str, output_path):
|
95 |
+
fps = util.detect_fps(gif_path)
|
96 |
+
filter = """scale='trunc(in_w/2)*2':'trunc(in_h/2)*2',format=yuv420p,fps=10"""
|
97 |
+
run_ffmpeg(['-i', gif_path, '-vf', f'"{filter}"', '-movflags', '+faststart', '-shortest', output_path])
|
98 |
+
|
99 |
+
|
100 |
+
def repair_video(original_video: str, final_video : str):
|
101 |
+
run_ffmpeg(['-i', original_video, '-movflags', 'faststart', '-acodec', 'copy', '-vcodec', 'copy', final_video])
|
102 |
|
103 |
|
104 |
def restore_audio(intermediate_video: str, original_video: str, trim_frame_start, trim_frame_end, final_video : str) -> None:
|
roop/utilities.py
CHANGED
@@ -13,6 +13,11 @@ import tempfile
|
|
13 |
import cv2
|
14 |
import zipfile
|
15 |
import traceback
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
from pathlib import Path
|
18 |
from typing import List, Any
|
@@ -26,6 +31,10 @@ import roop.globals
|
|
26 |
TEMP_FILE = "temp.mp4"
|
27 |
TEMP_DIRECTORY = "temp"
|
28 |
|
|
|
|
|
|
|
|
|
29 |
# monkey patch ssl for mac
|
30 |
if platform.system().lower() == "darwin":
|
31 |
ssl._create_default_https_context = ssl._create_unverified_context
|
@@ -173,6 +182,8 @@ def has_extension(filepath: str, extensions: List[str]) -> bool:
|
|
173 |
|
174 |
def is_image(image_path: str) -> bool:
|
175 |
if image_path and os.path.isfile(image_path):
|
|
|
|
|
176 |
mimetype, _ = mimetypes.guess_type(image_path)
|
177 |
return bool(mimetype and mimetype.startswith("image/"))
|
178 |
return False
|
@@ -337,3 +348,31 @@ gradio: {gradio.__version__}
|
|
337 |
|
338 |
def compute_cosine_distance(emb1, emb2) -> float:
|
339 |
return distance.cosine(emb1, emb2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
import cv2
|
14 |
import zipfile
|
15 |
import traceback
|
16 |
+
import threading
|
17 |
+
import threading
|
18 |
+
|
19 |
+
from typing import Union, Any
|
20 |
+
from contextlib import nullcontext
|
21 |
|
22 |
from pathlib import Path
|
23 |
from typing import List, Any
|
|
|
31 |
TEMP_FILE = "temp.mp4"
|
32 |
TEMP_DIRECTORY = "temp"
|
33 |
|
34 |
+
THREAD_SEMAPHORE = threading.Semaphore()
|
35 |
+
NULL_CONTEXT = nullcontext()
|
36 |
+
|
37 |
+
|
38 |
# monkey patch ssl for mac
|
39 |
if platform.system().lower() == "darwin":
|
40 |
ssl._create_default_https_context = ssl._create_unverified_context
|
|
|
182 |
|
183 |
def is_image(image_path: str) -> bool:
|
184 |
if image_path and os.path.isfile(image_path):
|
185 |
+
if image_path.endswith(".webp"):
|
186 |
+
return True
|
187 |
mimetype, _ = mimetypes.guess_type(image_path)
|
188 |
return bool(mimetype and mimetype.startswith("image/"))
|
189 |
return False
|
|
|
348 |
|
349 |
def compute_cosine_distance(emb1, emb2) -> float:
|
350 |
return distance.cosine(emb1, emb2)
|
351 |
+
|
352 |
+
def has_cuda_device():
|
353 |
+
return torch.cuda is not None and torch.cuda.is_available()
|
354 |
+
|
355 |
+
|
356 |
+
def print_cuda_info():
|
357 |
+
try:
|
358 |
+
print(f'Number of CUDA devices: {torch.cuda.device_count()} Currently used Id: {torch.cuda.current_device()} Device Name: {torch.cuda.get_device_name(torch.cuda.current_device())}')
|
359 |
+
except:
|
360 |
+
print('No CUDA device found!')
|
361 |
+
|
362 |
+
def clean_dir(path: str):
|
363 |
+
contents = os.listdir(path)
|
364 |
+
for item in contents:
|
365 |
+
item_path = os.path.join(path, item)
|
366 |
+
try:
|
367 |
+
if os.path.isfile(item_path):
|
368 |
+
os.remove(item_path)
|
369 |
+
elif os.path.isdir(item_path):
|
370 |
+
shutil.rmtree(item_path)
|
371 |
+
except Exception as e:
|
372 |
+
print(e)
|
373 |
+
|
374 |
+
|
375 |
+
def conditional_thread_semaphore() -> Union[Any, Any]:
|
376 |
+
if 'DmlExecutionProvider' in roop.globals.execution_providers or 'ROCMExecutionProvider' in roop.globals.execution_providers:
|
377 |
+
return THREAD_SEMAPHORE
|
378 |
+
return NULL_CONTEXT
|
roop/virtualcam.py
CHANGED
@@ -10,7 +10,7 @@ cam_active = False
|
|
10 |
cam_thread = None
|
11 |
vcam = None
|
12 |
|
13 |
-
def virtualcamera(streamobs, cam_num,width,height):
|
14 |
from roop.ProcessOptions import ProcessOptions
|
15 |
from roop.core import live_swap, get_processing_plugins
|
16 |
|
@@ -44,10 +44,11 @@ def virtualcamera(streamobs, cam_num,width,height):
|
|
44 |
print(f'Using {cam.native_fmt}')
|
45 |
else:
|
46 |
print(f'Not streaming to virtual camera!')
|
|
|
47 |
|
48 |
-
|
49 |
-
options = ProcessOptions(get_processing_plugins("mask_xseg"), roop.globals.distance_threshold, roop.globals.blend_ratio,
|
50 |
-
"all", 0, None, None, 1, False)
|
51 |
while cam_active:
|
52 |
ret, frame = cap.read()
|
53 |
if not ret:
|
@@ -67,12 +68,12 @@ def virtualcamera(streamobs, cam_num,width,height):
|
|
67 |
|
68 |
|
69 |
|
70 |
-
def start_virtual_cam(streamobs, cam_number, resolution):
|
71 |
global cam_thread, cam_active
|
72 |
|
73 |
if not cam_active:
|
74 |
width, height = map(int, resolution.split('x'))
|
75 |
-
cam_thread = threading.Thread(target=virtualcamera, args=[streamobs, cam_number, width, height])
|
76 |
cam_thread.start()
|
77 |
|
78 |
|
@@ -83,5 +84,5 @@ def stop_virtual_cam():
|
|
83 |
if cam_active:
|
84 |
cam_active = False
|
85 |
cam_thread.join()
|
86 |
-
|
87 |
|
|
|
10 |
cam_thread = None
|
11 |
vcam = None
|
12 |
|
13 |
+
def virtualcamera(streamobs, use_xseg, use_mouthrestore, cam_num,width,height):
|
14 |
from roop.ProcessOptions import ProcessOptions
|
15 |
from roop.core import live_swap, get_processing_plugins
|
16 |
|
|
|
44 |
print(f'Using {cam.native_fmt}')
|
45 |
else:
|
46 |
print(f'Not streaming to virtual camera!')
|
47 |
+
subsample_size = roop.globals.subsample_size
|
48 |
|
49 |
+
|
50 |
+
options = ProcessOptions(get_processing_plugins("mask_xseg" if use_xseg else None), roop.globals.distance_threshold, roop.globals.blend_ratio,
|
51 |
+
"all", 0, None, None, 1, subsample_size, False, use_mouthrestore)
|
52 |
while cam_active:
|
53 |
ret, frame = cap.read()
|
54 |
if not ret:
|
|
|
68 |
|
69 |
|
70 |
|
71 |
+
def start_virtual_cam(streamobs, use_xseg, use_mouthrestore, cam_number, resolution):
|
72 |
global cam_thread, cam_active
|
73 |
|
74 |
if not cam_active:
|
75 |
width, height = map(int, resolution.split('x'))
|
76 |
+
cam_thread = threading.Thread(target=virtualcamera, args=[streamobs, use_xseg, use_mouthrestore, cam_number, width, height])
|
77 |
cam_thread.start()
|
78 |
|
79 |
|
|
|
84 |
if cam_active:
|
85 |
cam_active = False
|
86 |
cam_thread.join()
|
87 |
+
|
88 |
|