Ggh596 commited on
Commit
5494b59
·
verified ·
1 Parent(s): 752438f

Upload 18 files

Browse files
roop/ProcessMgr.py CHANGED
@@ -3,10 +3,9 @@ import cv2
3
  import numpy as np
4
  import psutil
5
 
6
- from enum import Enum
7
  from roop.ProcessOptions import ProcessOptions
8
 
9
- from roop.face_util import get_first_face, get_all_faces, rotate_image_180, rotate_anticlockwise, rotate_clockwise, clamp_cut_values
10
  from roop.utilities import compute_cosine_distance, get_device, str_to_class
11
  import roop.vr_util as vr
12
 
@@ -17,15 +16,18 @@ from threading import Thread, Lock
17
  from queue import Queue
18
  from tqdm import tqdm
19
  from roop.ffmpeg_writer import FFMPEG_VideoWriter
 
20
  import roop.globals
21
 
22
 
 
23
  # Poor man's enum to be able to compare to int
24
  class eNoFaceAction():
25
  USE_ORIGINAL_FRAME = 0
26
  RETRY_ROTATED = 1
27
  SKIP_FRAME = 2
28
- SKIP_FRAME_IF_DISSIMILAR = 3
 
29
 
30
 
31
 
@@ -44,6 +46,7 @@ def pick_queue(queue: Queue[str], queue_per_future: int) -> List[str]:
44
  return queues
45
 
46
 
 
47
  class ProcessMgr():
48
  input_face_datas = []
49
  target_face_datas = []
@@ -64,11 +67,16 @@ class ProcessMgr():
64
  processed_queue = None
65
 
66
  videowriter= None
 
67
 
68
  progress_gradio = None
69
  total_frames = 0
70
 
71
-
 
 
 
 
72
 
73
 
74
  plugins = {
@@ -101,6 +109,8 @@ class ProcessMgr():
101
  def initialize(self, input_faces, target_faces, options):
102
  self.input_face_datas = input_faces
103
  self.target_face_datas = target_faces
 
 
104
  self.options = options
105
  devicename = get_device()
106
 
@@ -185,7 +195,8 @@ class ProcessMgr():
185
  resimg = self.process_frame(temp_frame)
186
  if resimg is not None:
187
  i = source_files.index(f)
188
- cv2.imwrite(target_files[i], resimg)
 
189
  if update:
190
  update()
191
 
@@ -239,7 +250,10 @@ class ProcessMgr():
239
  process, frame = self.processed_queue[nextindex % self.num_threads].get()
240
  nextindex += 1
241
  if frame is not None:
242
- self.videowriter.write_frame(frame)
 
 
 
243
  del frame
244
  elif process == False:
245
  num_producers -= 1
@@ -248,7 +262,11 @@ class ProcessMgr():
248
 
249
 
250
 
251
- def run_batch_inmem(self, source_video, target_video, frame_start, frame_end, fps, threads:int = 1, skip_audio=False):
 
 
 
 
252
  cap = cv2.VideoCapture(source_video)
253
  # frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
254
  frame_count = (frame_end - frame_start) + 1
@@ -275,7 +293,13 @@ class ProcessMgr():
275
  self.frames_queue.append(Queue(1))
276
  self.processed_queue.append(Queue(1))
277
 
278
- self.videowriter = FFMPEG_VideoWriter(target_video, (width, height), fps, codec=roop.globals.video_encoder, crf=roop.globals.video_quality, audiofile=None)
 
 
 
 
 
 
279
 
280
  readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
281
  readthread.start()
@@ -298,7 +322,11 @@ class ProcessMgr():
298
  readthread.join()
299
  writethread.join()
300
  cap.release()
301
- self.videowriter.close()
 
 
 
 
302
  self.frames_queue.clear()
303
  self.processed_queue.clear()
304
 
@@ -317,11 +345,6 @@ class ProcessMgr():
317
  self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
318
 
319
 
320
- # https://github.com/deepinsight/insightface#third-party-re-implementation-of-arcface
321
- # https://github.com/deepinsight/insightface/blob/master/alignment/coordinate_reg/image_infer.py
322
- # https://github.com/deepinsight/insightface/issues/1350
323
- # https://github.com/linghu8812/tensorrt_inference
324
-
325
 
326
  def process_frame(self, frame:Frame):
327
  if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
@@ -332,8 +355,16 @@ class ProcessMgr():
332
  if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME_IF_DISSIMILAR:
333
  if len(self.input_face_datas) > num_swapped:
334
  return None
 
 
335
  return temp_frame
336
- if roop.globals.no_face_action == eNoFaceAction.USE_ORIGINAL_FRAME:
 
 
 
 
 
 
337
  return frame
338
  if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME:
339
  #This only works with in-mem processing, as it simply skips the frame.
@@ -374,6 +405,8 @@ class ProcessMgr():
374
 
375
  num_faces_found += 1
376
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
 
 
377
  else:
378
  faces = get_all_faces(frame)
379
  if faces is None:
@@ -383,7 +416,14 @@ class ProcessMgr():
383
  for face in faces:
384
  num_faces_found += 1
385
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
386
- del face
 
 
 
 
 
 
 
387
 
388
  elif self.options.swap_mode == "selected":
389
  num_targetfaces = len(self.target_face_datas)
@@ -397,7 +437,6 @@ class ProcessMgr():
397
  else:
398
  temp_frame = self.process_face(i, face, temp_frame)
399
  num_faces_found += 1
400
- del face
401
  if not roop.globals.vr_mode and num_faces_found == num_targetfaces:
402
  break
403
  elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
@@ -406,7 +445,13 @@ class ProcessMgr():
406
  if face.sex == gender:
407
  num_faces_found += 1
408
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
409
- del face
 
 
 
 
 
 
410
 
411
  if roop.globals.vr_mode and num_faces_found % 2 > 0:
412
  # stereo image, there has to be an even number of faces
@@ -541,17 +586,31 @@ class ProcessMgr():
541
 
542
  # img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image
543
 
544
- fake_frame = None
545
- aligned_img, M = align_crop(frame, target_face.kps, 128)
 
 
 
 
 
 
 
 
546
  fake_frame = aligned_img
547
- swap_frame = aligned_img
548
  target_face.matrix = M
 
549
  for p in self.processors:
550
  if p.type == 'swap':
551
- if inputface is not None:
 
 
552
  for _ in range(0,self.options.num_swap_steps):
553
- swap_frame = p.Run(inputface, target_face, swap_frame)
554
- fake_frame = swap_frame
 
 
 
 
555
  scale_factor = 0.0
556
  elif p.type == 'mask':
557
  fake_frame = self.process_mask(p, aligned_img, fake_frame)
@@ -560,8 +619,8 @@ class ProcessMgr():
560
 
561
  upscale = 512
562
  orig_width = fake_frame.shape[1]
563
-
564
- fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
565
  mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets
566
 
567
 
@@ -571,9 +630,14 @@ class ProcessMgr():
571
  else:
572
  result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
573
 
 
 
 
 
 
574
  if rotation_action is not None:
575
  fake_frame = self.auto_unrotate_frame(result, rotation_action)
576
- return self.paste_simple(fake_frame, saved_frame, startX, startY)
577
 
578
  return result
579
 
@@ -673,6 +737,43 @@ class ProcessMgr():
673
  return cv2.GaussianBlur(img_matte, blur_size, 0)
674
 
675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
  def process_mask(self, processor, frame:Frame, target:Frame):
677
  img_mask = processor.Run(frame, self.options.masking_text)
678
  img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
@@ -688,7 +789,98 @@ class ProcessMgr():
688
  result += img_mask * frame.astype(np.float32)
689
  return np.uint8(result)
690
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
 
693
 
694
  def unload_models():
@@ -699,4 +891,8 @@ class ProcessMgr():
699
  for p in self.processors:
700
  p.Release()
701
  self.processors.clear()
 
 
 
 
702
 
 
3
  import numpy as np
4
  import psutil
5
 
 
6
  from roop.ProcessOptions import ProcessOptions
7
 
8
+ from roop.face_util import get_first_face, get_all_faces, rotate_anticlockwise, rotate_clockwise, clamp_cut_values
9
  from roop.utilities import compute_cosine_distance, get_device, str_to_class
10
  import roop.vr_util as vr
11
 
 
16
  from queue import Queue
17
  from tqdm import tqdm
18
  from roop.ffmpeg_writer import FFMPEG_VideoWriter
19
+ from roop.StreamWriter import StreamWriter
20
  import roop.globals
21
 
22
 
23
+
24
  # Poor man's enum to be able to compare to int
25
  class eNoFaceAction():
26
  USE_ORIGINAL_FRAME = 0
27
  RETRY_ROTATED = 1
28
  SKIP_FRAME = 2
29
+ SKIP_FRAME_IF_DISSIMILAR = 3,
30
+ USE_LAST_SWAPPED = 4
31
 
32
 
33
 
 
46
  return queues
47
 
48
 
49
+
50
  class ProcessMgr():
51
  input_face_datas = []
52
  target_face_datas = []
 
67
  processed_queue = None
68
 
69
  videowriter= None
70
+ streamwriter = None
71
 
72
  progress_gradio = None
73
  total_frames = 0
74
 
75
+ num_frames_no_face = 0
76
+ last_swapped_frame = None
77
+
78
+ output_to_file = None
79
+ output_to_cam = None
80
 
81
 
82
  plugins = {
 
109
  def initialize(self, input_faces, target_faces, options):
110
  self.input_face_datas = input_faces
111
  self.target_face_datas = target_faces
112
+ self.num_frames_no_face = 0
113
+ self.last_swapped_frame = None
114
  self.options = options
115
  devicename = get_device()
116
 
 
195
  resimg = self.process_frame(temp_frame)
196
  if resimg is not None:
197
  i = source_files.index(f)
198
+ # Also let numpy write the file to support utf-8/16 filenames
199
+ cv2.imencode(f'.{roop.globals.CFG.output_image_format}',resimg)[1].tofile(target_files[i])
200
  if update:
201
  update()
202
 
 
250
  process, frame = self.processed_queue[nextindex % self.num_threads].get()
251
  nextindex += 1
252
  if frame is not None:
253
+ if self.output_to_file:
254
+ self.videowriter.write_frame(frame)
255
+ if self.output_to_cam:
256
+ self.streamwriter.WriteToStream(frame)
257
  del frame
258
  elif process == False:
259
  num_producers -= 1
 
262
 
263
 
264
 
265
+ def run_batch_inmem(self, output_method, source_video, target_video, frame_start, frame_end, fps, threads:int = 1):
266
+ if len(self.processors) < 1:
267
+ print("No processor defined!")
268
+ return
269
+
270
  cap = cv2.VideoCapture(source_video)
271
  # frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
272
  frame_count = (frame_end - frame_start) + 1
 
293
  self.frames_queue.append(Queue(1))
294
  self.processed_queue.append(Queue(1))
295
 
296
+ self.output_to_file = output_method != "Virtual Camera"
297
+ self.output_to_cam = output_method == "Virtual Camera" or output_method == "Both"
298
+
299
+ if self.output_to_file:
300
+ self.videowriter = FFMPEG_VideoWriter(target_video, (width, height), fps, codec=roop.globals.video_encoder, crf=roop.globals.video_quality, audiofile=None)
301
+ if self.output_to_cam:
302
+ self.streamwriter = StreamWriter((width, height), int(fps))
303
 
304
  readthread = Thread(target=self.read_frames_thread, args=(cap, frame_start, frame_end, threads))
305
  readthread.start()
 
322
  readthread.join()
323
  writethread.join()
324
  cap.release()
325
+ if self.output_to_file:
326
+ self.videowriter.close()
327
+ if self.output_to_cam:
328
+ self.streamwriter.Close()
329
+
330
  self.frames_queue.clear()
331
  self.processed_queue.clear()
332
 
 
345
  self.progress_gradio((progress.n, self.total_frames), desc='Processing', total=self.total_frames, unit='frames')
346
 
347
 
 
 
 
 
 
348
 
349
  def process_frame(self, frame:Frame):
350
  if len(self.input_face_datas) < 1 and not self.options.show_face_masking:
 
355
  if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME_IF_DISSIMILAR:
356
  if len(self.input_face_datas) > num_swapped:
357
  return None
358
+ self.num_frames_no_face = 0
359
+ self.last_swapped_frame = temp_frame.copy()
360
  return temp_frame
361
+ if roop.globals.no_face_action == eNoFaceAction.USE_LAST_SWAPPED:
362
+ if self.last_swapped_frame is not None and self.num_frames_no_face < self.options.max_num_reuse_frame:
363
+ self.num_frames_no_face += 1
364
+ return self.last_swapped_frame.copy()
365
+ return frame
366
+
367
+ elif roop.globals.no_face_action == eNoFaceAction.USE_ORIGINAL_FRAME:
368
  return frame
369
  if roop.globals.no_face_action == eNoFaceAction.SKIP_FRAME:
370
  #This only works with in-mem processing, as it simply skips the frame.
 
405
 
406
  num_faces_found += 1
407
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
408
+ del face
409
+
410
  else:
411
  faces = get_all_faces(frame)
412
  if faces is None:
 
416
  for face in faces:
417
  num_faces_found += 1
418
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
419
+
420
+ elif self.options.swap_mode == "all_input":
421
+ for i,face in enumerate(faces):
422
+ num_faces_found += 1
423
+ if i < len(self.input_face_datas):
424
+ temp_frame = self.process_face(i, face, temp_frame)
425
+ else:
426
+ break
427
 
428
  elif self.options.swap_mode == "selected":
429
  num_targetfaces = len(self.target_face_datas)
 
437
  else:
438
  temp_frame = self.process_face(i, face, temp_frame)
439
  num_faces_found += 1
 
440
  if not roop.globals.vr_mode and num_faces_found == num_targetfaces:
441
  break
442
  elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male":
 
445
  if face.sex == gender:
446
  num_faces_found += 1
447
  temp_frame = self.process_face(self.options.selected_index, face, temp_frame)
448
+
449
+ # might be slower but way more clean to release everything here
450
+ for face in faces:
451
+ del face
452
+ faces.clear()
453
+
454
+
455
 
456
  if roop.globals.vr_mode and num_faces_found % 2 > 0:
457
  # stereo image, there has to be an even number of faces
 
586
 
587
  # img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image
588
 
589
+
590
+ """ Code ported/adapted from Facefusion which borrowed the idea from Rope:
591
+ Kind of subsampling the cutout and aligned face image and faceswapping slices of it up to
592
+ the desired output resolution. This works around the current resolution limitations without using enhancers.
593
+ """
594
+ model_output_size = 128
595
+ subsample_size = self.options.subsample_size
596
+ subsample_total = subsample_size // model_output_size
597
+ aligned_img, M = align_crop(frame, target_face.kps, subsample_size)
598
+
599
  fake_frame = aligned_img
 
600
  target_face.matrix = M
601
+
602
  for p in self.processors:
603
  if p.type == 'swap':
604
+ swap_result_frames = []
605
+ subsample_frames = self.implode_pixel_boost(aligned_img, model_output_size, subsample_total)
606
+ for sliced_frame in subsample_frames:
607
  for _ in range(0,self.options.num_swap_steps):
608
+ sliced_frame = self.prepare_crop_frame(sliced_frame)
609
+ sliced_frame = p.Run(inputface, target_face, sliced_frame)
610
+ sliced_frame = self.normalize_swap_frame(sliced_frame)
611
+ swap_result_frames.append(sliced_frame)
612
+ fake_frame = self.explode_pixel_boost(swap_result_frames, model_output_size, subsample_total, subsample_size)
613
+ fake_frame = fake_frame.astype(np.uint8)
614
  scale_factor = 0.0
615
  elif p.type == 'mask':
616
  fake_frame = self.process_mask(p, aligned_img, fake_frame)
 
619
 
620
  upscale = 512
621
  orig_width = fake_frame.shape[1]
622
+ if orig_width != upscale:
623
+ fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC)
624
  mask_offsets = (0,0,0,0,1,20) if inputface is None else inputface.mask_offsets
625
 
626
 
 
630
  else:
631
  result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets)
632
 
633
+ # Restore mouth before unrotating
634
+ if self.options.restore_original_mouth:
635
+ mouth_cutout, mouth_bb = self.create_mouth_mask(target_face, frame)
636
+ result = self.apply_mouth_area(result, mouth_cutout, mouth_bb)
637
+
638
  if rotation_action is not None:
639
  fake_frame = self.auto_unrotate_frame(result, rotation_action)
640
+ result = self.paste_simple(fake_frame, saved_frame, startX, startY)
641
 
642
  return result
643
 
 
737
  return cv2.GaussianBlur(img_matte, blur_size, 0)
738
 
739
 
740
+ def prepare_crop_frame(self, swap_frame):
741
+ model_type = 'inswapper'
742
+ model_mean = [0.0, 0.0, 0.0]
743
+ model_standard_deviation = [1.0, 1.0, 1.0]
744
+
745
+ if model_type == 'ghost':
746
+ swap_frame = swap_frame[:, :, ::-1] / 127.5 - 1
747
+ else:
748
+ swap_frame = swap_frame[:, :, ::-1] / 255.0
749
+ swap_frame = (swap_frame - model_mean) / model_standard_deviation
750
+ swap_frame = swap_frame.transpose(2, 0, 1)
751
+ swap_frame = np.expand_dims(swap_frame, axis = 0).astype(np.float32)
752
+ return swap_frame
753
+
754
+
755
+ def normalize_swap_frame(self, swap_frame):
756
+ model_type = 'inswapper'
757
+ swap_frame = swap_frame.transpose(1, 2, 0)
758
+
759
+ if model_type == 'ghost':
760
+ swap_frame = (swap_frame * 127.5 + 127.5).round()
761
+ else:
762
+ swap_frame = (swap_frame * 255.0).round()
763
+ swap_frame = swap_frame[:, :, ::-1]
764
+ return swap_frame
765
+
766
+ def implode_pixel_boost(self, aligned_face_frame, model_size, pixel_boost_total : int):
767
+ subsample_frame = aligned_face_frame.reshape(model_size, pixel_boost_total, model_size, pixel_boost_total, 3)
768
+ subsample_frame = subsample_frame.transpose(1, 3, 0, 2, 4).reshape(pixel_boost_total ** 2, model_size, model_size, 3)
769
+ return subsample_frame
770
+
771
+
772
+ def explode_pixel_boost(self, subsample_frame, model_size, pixel_boost_total, pixel_boost_size):
773
+ final_frame = np.stack(subsample_frame, axis = 0).reshape(pixel_boost_total, pixel_boost_total, model_size, model_size, 3)
774
+ final_frame = final_frame.transpose(2, 0, 3, 1, 4).reshape(pixel_boost_size, pixel_boost_size, 3)
775
+ return final_frame
776
+
777
  def process_mask(self, processor, frame:Frame, target:Frame):
778
  img_mask = processor.Run(frame, self.options.masking_text)
779
  img_mask = cv2.resize(img_mask, (target.shape[1], target.shape[0]))
 
789
  result += img_mask * frame.astype(np.float32)
790
  return np.uint8(result)
791
 
792
+
793
+ # Code for mouth restoration adapted from https://github.com/iVideoGameBoss/iRoopDeepFaceCam
794
+
795
+ def create_mouth_mask(self, face: Face, frame: Frame):
796
+ mouth_cutout = None
797
+
798
+ landmarks = face.landmark_2d_106
799
+ if landmarks is not None:
800
+ # Get mouth landmarks (indices 52 to 71 typically represent the outer mouth)
801
+ mouth_points = landmarks[52:71].astype(np.int32)
802
+
803
+ # Add padding to mouth area
804
+ min_x, min_y = np.min(mouth_points, axis=0)
805
+ max_x, max_y = np.max(mouth_points, axis=0)
806
+ min_x = max(0, min_x - (15*6))
807
+ min_y = max(0, min_y - 22)
808
+ max_x = min(frame.shape[1], max_x + (15*6))
809
+ max_y = min(frame.shape[0], max_y + (90*6))
810
+
811
+ # Extract the mouth area from the frame using the calculated bounding box
812
+ mouth_cutout = frame[min_y:max_y, min_x:max_x].copy()
813
+
814
+ return mouth_cutout, (min_x, min_y, max_x, max_y)
815
+
816
+
817
+
818
+ def create_feathered_mask(self, shape, feather_amount=30):
819
+ mask = np.zeros(shape[:2], dtype=np.float32)
820
+ center = (shape[1] // 2, shape[0] // 2)
821
+ cv2.ellipse(mask, center, (shape[1] // 2 - feather_amount, shape[0] // 2 - feather_amount),
822
+ 0, 0, 360, 1, -1)
823
+ mask = cv2.GaussianBlur(mask, (feather_amount*2+1, feather_amount*2+1), 0)
824
+ return mask / np.max(mask)
825
+
826
+ def apply_mouth_area(self, frame: np.ndarray, mouth_cutout: np.ndarray, mouth_box: tuple) -> np.ndarray:
827
+ min_x, min_y, max_x, max_y = mouth_box
828
+ box_width = max_x - min_x
829
+ box_height = max_y - min_y
830
+
831
+
832
+ # Resize the mouth cutout to match the mouth box size
833
+ if mouth_cutout is None or box_width is None or box_height is None:
834
+ return frame
835
+ try:
836
+ resized_mouth_cutout = cv2.resize(mouth_cutout, (box_width, box_height))
837
+
838
+ # Extract the region of interest (ROI) from the target frame
839
+ roi = frame[min_y:max_y, min_x:max_x]
840
+
841
+ # Ensure the ROI and resized_mouth_cutout have the same shape
842
+ if roi.shape != resized_mouth_cutout.shape:
843
+ resized_mouth_cutout = cv2.resize(resized_mouth_cutout, (roi.shape[1], roi.shape[0]))
844
+
845
+ # Apply color transfer from ROI to mouth cutout
846
+ color_corrected_mouth = self.apply_color_transfer(resized_mouth_cutout, roi)
847
+
848
+ # Create a feathered mask with increased feather amount
849
+ feather_amount = min(30, box_width // 15, box_height // 15)
850
+ mask = self.create_feathered_mask(resized_mouth_cutout.shape, feather_amount)
851
+
852
+ # Blend the color-corrected mouth cutout with the ROI using the feathered mask
853
+ mask = mask[:,:,np.newaxis] # Add channel dimension to mask
854
+ blended = (color_corrected_mouth * mask + roi * (1 - mask)).astype(np.uint8)
855
 
856
+ # Place the blended result back into the frame
857
+ frame[min_y:max_y, min_x:max_x] = blended
858
+ except Exception as e:
859
+ print(f'Error {e}')
860
+ pass
861
+
862
+ return frame
863
+
864
+ def apply_color_transfer(self, source, target):
865
+ """
866
+ Apply color transfer from target to source image
867
+ """
868
+ source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32")
869
+ target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32")
870
+
871
+ source_mean, source_std = cv2.meanStdDev(source)
872
+ target_mean, target_std = cv2.meanStdDev(target)
873
+
874
+ # Reshape mean and std to be broadcastable
875
+ source_mean = source_mean.reshape(1, 1, 3)
876
+ source_std = source_std.reshape(1, 1, 3)
877
+ target_mean = target_mean.reshape(1, 1, 3)
878
+ target_std = target_std.reshape(1, 1, 3)
879
+
880
+ # Perform the color transfer
881
+ source = (source - source_mean) * (target_std / source_std) + target_mean
882
+ return cv2.cvtColor(np.clip(source, 0, 255).astype("uint8"), cv2.COLOR_LAB2BGR)
883
+
884
 
885
 
886
  def unload_models():
 
891
  for p in self.processors:
892
  p.Release()
893
  self.processors.clear()
894
+ if self.videowriter is not None:
895
+ self.videowriter.close()
896
+ if self.streamwriter is not None:
897
+ self.streamwriter.Close()
898
 
roop/ProcessOptions.py CHANGED
@@ -1,6 +1,6 @@
1
  class ProcessOptions:
2
 
3
- def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, show_face_area, show_mask=False):
4
  self.processors = processordefines
5
  self.face_distance_threshold = face_distance
6
  self.blend_ratio = blend_ratio
@@ -10,4 +10,7 @@ class ProcessOptions:
10
  self.imagemask = imagemask
11
  self.num_swap_steps = num_steps
12
  self.show_face_area_overlay = show_face_area
13
- self.show_face_masking = show_mask
 
 
 
 
1
  class ProcessOptions:
2
 
3
+ def __init__(self, processordefines:dict, face_distance, blend_ratio, swap_mode, selected_index, masking_text, imagemask, num_steps, subsample_size, show_face_area, restore_original_mouth, show_mask=False):
4
  self.processors = processordefines
5
  self.face_distance_threshold = face_distance
6
  self.blend_ratio = blend_ratio
 
10
  self.imagemask = imagemask
11
  self.num_swap_steps = num_steps
12
  self.show_face_area_overlay = show_face_area
13
+ self.show_face_masking = show_mask
14
+ self.subsample_size = subsample_size
15
+ self.restore_original_mouth = restore_original_mouth
16
+ self.max_num_reuse_frame = 15
roop/StreamWriter.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import time
3
+ import pyvirtualcam
4
+
5
+
6
+ class StreamWriter():
7
+ FPS = 30
8
+ VCam = None
9
+ Active = False
10
+ THREAD_LOCK_STREAM = threading.Lock()
11
+ time_last_process = None
12
+ timespan_min = 0.0
13
+
14
+ def __enter__(self):
15
+ return self
16
+
17
+ def __exit__(self, exc_type, exc_value, traceback):
18
+ self.Close()
19
+
20
+ def __init__(self, size, fps):
21
+ self.time_last_process = time.perf_counter()
22
+ self.FPS = fps
23
+ self.timespan_min = 1.0 / fps
24
+ print('Detecting virtual cam devices')
25
+ self.VCam = pyvirtualcam.Camera(width=size[0], height=size[1], fps=fps, fmt=pyvirtualcam.PixelFormat.BGR, print_fps=False)
26
+ if self.VCam is None:
27
+ print("No virtual camera found!")
28
+ return
29
+ print(f'Using virtual camera: {self.VCam.device}')
30
+ print(f'Using {self.VCam.native_fmt}')
31
+ self.Active = True
32
+
33
+
34
+ def LimitFrames(self):
35
+ while True:
36
+ current_time = time.perf_counter()
37
+ time_passed = current_time - self.time_last_process
38
+ if time_passed >= self.timespan_min:
39
+ break
40
+
41
+ # First version used a queue and threading. Surprisingly this
42
+ # totally simple, blocking version is 10 times faster!
43
+ def WriteToStream(self, frame):
44
+ if self.VCam is None:
45
+ return
46
+ with self.THREAD_LOCK_STREAM:
47
+ self.LimitFrames()
48
+ self.VCam.send(frame)
49
+ self.time_last_process = time.perf_counter()
50
+
51
+
52
+ def Close(self):
53
+ self.Active = False
54
+ if self.VCam is None:
55
+ self.VCam.close()
56
+ self.VCam = None
57
+
58
+
59
+
60
+
roop/capturer.py CHANGED
@@ -4,6 +4,10 @@ import numpy as np
4
 
5
  from roop.typing import Frame
6
 
 
 
 
 
7
  def get_image_frame(filename: str):
8
  try:
9
  return cv2.imdecode(np.fromfile(filename, dtype=np.uint8), cv2.IMREAD_COLOR)
@@ -13,15 +17,27 @@ def get_image_frame(filename: str):
13
 
14
 
15
  def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
16
- capture = cv2.VideoCapture(video_path)
17
- frame_total = capture.get(cv2.CAP_PROP_FRAME_COUNT)
18
- capture.set(cv2.CAP_PROP_POS_FRAMES, min(frame_total, frame_number - 1))
19
- has_frame, frame = capture.read()
20
- capture.release()
 
 
 
 
 
21
  if has_frame:
22
  return frame
23
  return None
24
 
 
 
 
 
 
 
 
25
 
26
  def get_video_frame_total(video_path: str) -> int:
27
  capture = cv2.VideoCapture(video_path)
 
4
 
5
  from roop.typing import Frame
6
 
7
+ current_video_path = None
8
+ current_frame_total = 0
9
+ current_capture = None
10
+
11
  def get_image_frame(filename: str):
12
  try:
13
  return cv2.imdecode(np.fromfile(filename, dtype=np.uint8), cv2.IMREAD_COLOR)
 
17
 
18
 
19
  def get_video_frame(video_path: str, frame_number: int = 0) -> Optional[Frame]:
20
+ global current_video_path, current_capture, current_frame_total
21
+
22
+ if video_path != current_video_path:
23
+ release_video()
24
+ current_capture = cv2.VideoCapture(video_path)
25
+ current_video_path = video_path
26
+ current_frame_total = current_capture.get(cv2.CAP_PROP_FRAME_COUNT)
27
+
28
+ current_capture.set(cv2.CAP_PROP_POS_FRAMES, min(current_frame_total, frame_number - 1))
29
+ has_frame, frame = current_capture.read()
30
  if has_frame:
31
  return frame
32
  return None
33
 
34
+ def release_video():
35
+ global current_capture
36
+
37
+ if current_capture is not None:
38
+ current_capture.release()
39
+ current_capture = None
40
+
41
 
42
  def get_video_frame_total(video_path: str) -> int:
43
  capture = cv2.VideoCapture(video_path)
roop/core.py CHANGED
@@ -14,6 +14,7 @@ import signal
14
  import torch
15
  import onnxruntime
16
  import pathlib
 
17
 
18
  from time import time
19
 
@@ -27,7 +28,7 @@ from roop.face_util import extract_face_images
27
  from roop.ProcessEntry import ProcessEntry
28
  from roop.ProcessMgr import ProcessMgr
29
  from roop.ProcessOptions import ProcessOptions
30
- from roop.capturer import get_video_frame_total
31
 
32
 
33
  clip_text = None
@@ -47,9 +48,12 @@ warnings.filterwarnings('ignore', category=UserWarning, module='torchvision')
47
  def parse_args() -> None:
48
  signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
49
  roop.globals.headless = False
 
 
 
 
 
50
  # Always enable all processors when using GUI
51
- if len(sys.argv) > 1:
52
- print('No CLI args supported - use Settings Tab instead')
53
  roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
54
 
55
 
@@ -58,8 +62,20 @@ def encode_execution_providers(execution_providers: List[str]) -> List[str]:
58
 
59
 
60
  def decode_execution_providers(execution_providers: List[str]) -> List[str]:
61
- return [provider for provider, encoded_execution_provider in zip(onnxruntime.get_available_providers(), encode_execution_providers(onnxruntime.get_available_providers()))
62
  if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
  def suggest_max_memory() -> int:
@@ -204,7 +220,7 @@ def live_swap(frame, options):
204
  return newframe
205
 
206
 
207
- def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip_text:str, use_new_method, imagemask, num_swap_steps, progress, selected_index = 0) -> None:
208
  global clip_text, process_mgr
209
 
210
  release_resources()
@@ -214,9 +230,11 @@ def batch_process_regular(files:list[ProcessEntry], masking_engine:str, new_clip
214
  mask = imagemask["layers"][0] if imagemask is not None else None
215
  if len(roop.globals.INPUT_FACESETS) <= selected_index:
216
  selected_index = 0
217
- options = ProcessOptions(get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio, roop.globals.face_swap_mode, selected_index, new_clip_text, mask, num_swap_steps, False)
 
 
218
  process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
219
- batch_process(files, use_new_method)
220
  return
221
 
222
  def batch_process_with_options(files:list[ProcessEntry], options, progress):
@@ -230,11 +248,11 @@ def batch_process_with_options(files:list[ProcessEntry], options, progress):
230
  roop.globals.keep_frames = False
231
  roop.globals.wait_after_extraction = False
232
  roop.globals.skip_audio = False
233
- batch_process(files, True)
234
 
235
 
236
 
237
- def batch_process(files:list[ProcessEntry], use_new_method) -> None:
238
  global clip_text, process_mgr
239
 
240
  roop.globals.processing = True
@@ -287,9 +305,12 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
287
  if v.endframe == 0:
288
  v.endframe = get_video_frame_total(v.filename)
289
 
290
- update_status(f'Creating {os.path.basename(v.finalname)} with {fps} FPS...')
 
 
 
291
  start_processing = time()
292
- if roop.globals.keep_frames or not use_new_method:
293
  util.create_temp(v.filename)
294
  update_status('Extracting frames...')
295
  ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
@@ -317,7 +338,7 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
317
  skip_audio = True
318
  else:
319
  skip_audio = roop.globals.skip_audio
320
- process_mgr.run_batch_inmem(v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads, skip_audio)
321
 
322
  if not roop.globals.processing:
323
  end_processing('Processing stopped!')
@@ -346,10 +367,12 @@ def batch_process(files:list[ProcessEntry], use_new_method) -> None:
346
  os.remove(video_file_name)
347
  else:
348
  shutil.move(video_file_name, destination)
349
- update_status(f'\nProcessing {os.path.basename(destination)} took {time() - start_processing} secs')
350
 
351
- else:
352
  update_status(f'Failed processing {os.path.basename(v.finalname)}!')
 
 
 
353
  end_processing('Finished')
354
 
355
 
@@ -371,8 +394,11 @@ def run() -> None:
371
  if not pre_check():
372
  return
373
  roop.globals.CFG = Settings('config.yaml')
 
374
  roop.globals.execution_threads = roop.globals.CFG.max_threads
375
  roop.globals.video_encoder = roop.globals.CFG.output_video_codec
376
  roop.globals.video_quality = roop.globals.CFG.video_quality
377
  roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
 
 
378
  main.run()
 
14
  import torch
15
  import onnxruntime
16
  import pathlib
17
+ import argparse
18
 
19
  from time import time
20
 
 
28
  from roop.ProcessEntry import ProcessEntry
29
  from roop.ProcessMgr import ProcessMgr
30
  from roop.ProcessOptions import ProcessOptions
31
+ from roop.capturer import get_video_frame_total, release_video
32
 
33
 
34
  clip_text = None
 
48
  def parse_args() -> None:
49
  signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
50
  roop.globals.headless = False
51
+
52
+ program = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=100))
53
+ program.add_argument('--server_share', help='Public server', dest='server_share', action='store_true', default=False)
54
+ program.add_argument('--cuda_device_id', help='Index of the cuda gpu to use', dest='cuda_device_id', type=int, default=0)
55
+ roop.globals.startup_args = program.parse_args()
56
  # Always enable all processors when using GUI
 
 
57
  roop.globals.frame_processors = ['face_swapper', 'face_enhancer']
58
 
59
 
 
62
 
63
 
64
  def decode_execution_providers(execution_providers: List[str]) -> List[str]:
65
+ list_providers = [provider for provider, encoded_execution_provider in zip(onnxruntime.get_available_providers(), encode_execution_providers(onnxruntime.get_available_providers()))
66
  if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers)]
67
+
68
+ try:
69
+ for i in range(len(list_providers)):
70
+ if list_providers[i] == 'CUDAExecutionProvider':
71
+ list_providers[i] = ('CUDAExecutionProvider', {'device_id': roop.globals.cuda_device_id})
72
+ torch.cuda.set_device(roop.globals.cuda_device_id)
73
+ break
74
+ except:
75
+ pass
76
+
77
+ return list_providers
78
+
79
 
80
 
81
  def suggest_max_memory() -> int:
 
220
  return newframe
221
 
222
 
223
+ def batch_process_regular(output_method, files:list[ProcessEntry], masking_engine:str, new_clip_text:str, use_new_method, imagemask, restore_original_mouth, num_swap_steps, progress, selected_index = 0) -> None:
224
  global clip_text, process_mgr
225
 
226
  release_resources()
 
230
  mask = imagemask["layers"][0] if imagemask is not None else None
231
  if len(roop.globals.INPUT_FACESETS) <= selected_index:
232
  selected_index = 0
233
+ options = ProcessOptions(get_processing_plugins(masking_engine), roop.globals.distance_threshold, roop.globals.blend_ratio,
234
+ roop.globals.face_swap_mode, selected_index, new_clip_text, mask, num_swap_steps,
235
+ roop.globals.subsample_size, False, restore_original_mouth)
236
  process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
237
+ batch_process(output_method, files, use_new_method)
238
  return
239
 
240
  def batch_process_with_options(files:list[ProcessEntry], options, progress):
 
248
  roop.globals.keep_frames = False
249
  roop.globals.wait_after_extraction = False
250
  roop.globals.skip_audio = False
251
+ batch_process("Files", files, True)
252
 
253
 
254
 
255
+ def batch_process(output_method, files:list[ProcessEntry], use_new_method) -> None:
256
  global clip_text, process_mgr
257
 
258
  roop.globals.processing = True
 
305
  if v.endframe == 0:
306
  v.endframe = get_video_frame_total(v.filename)
307
 
308
+ is_streaming_only = output_method == "Virtual Camera"
309
+ if is_streaming_only == False:
310
+ update_status(f'Creating {os.path.basename(v.finalname)} with {fps} FPS...')
311
+
312
  start_processing = time()
313
+ if is_streaming_only == False and roop.globals.keep_frames or not use_new_method:
314
  util.create_temp(v.filename)
315
  update_status('Extracting frames...')
316
  ffmpeg.extract_frames(v.filename,v.startframe,v.endframe, fps)
 
338
  skip_audio = True
339
  else:
340
  skip_audio = roop.globals.skip_audio
341
+ process_mgr.run_batch_inmem(output_method, v.filename, v.finalname, v.startframe, v.endframe, fps,roop.globals.execution_threads)
342
 
343
  if not roop.globals.processing:
344
  end_processing('Processing stopped!')
 
367
  os.remove(video_file_name)
368
  else:
369
  shutil.move(video_file_name, destination)
 
370
 
371
+ elif is_streaming_only == False:
372
  update_status(f'Failed processing {os.path.basename(v.finalname)}!')
373
+ elapsed_time = time() - start_processing
374
+ average_fps = (v.endframe - v.startframe) / elapsed_time
375
+ update_status(f'\nProcessing {os.path.basename(destination)} took {elapsed_time:.2f} secs, {average_fps:.2f} frames/s')
376
  end_processing('Finished')
377
 
378
 
 
394
  if not pre_check():
395
  return
396
  roop.globals.CFG = Settings('config.yaml')
397
+ roop.globals.cuda_device_id = roop.globals.startup_args.cuda_device_id
398
  roop.globals.execution_threads = roop.globals.CFG.max_threads
399
  roop.globals.video_encoder = roop.globals.CFG.output_video_codec
400
  roop.globals.video_quality = roop.globals.CFG.video_quality
401
  roop.globals.max_memory = roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
402
+ if roop.globals.startup_args.server_share:
403
+ roop.globals.CFG.server_share = True
404
  main.run()
roop/face_util.py CHANGED
@@ -9,18 +9,18 @@ import cv2
9
  import numpy as np
10
  from skimage import transform as trans
11
  from roop.capturer import get_video_frame
12
- from roop.utilities import resolve_relative_path, conditional_download
13
 
14
  FACE_ANALYSER = None
15
- THREAD_LOCK_ANALYSER = threading.Lock()
16
- THREAD_LOCK_SWAPPER = threading.Lock()
17
  FACE_SWAPPER = None
18
 
19
 
20
  def get_face_analyser() -> Any:
21
  global FACE_ANALYSER
22
 
23
- with THREAD_LOCK_ANALYSER:
24
  if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
25
  model_path = resolve_relative_path('..')
26
  # removed genderage
@@ -210,15 +210,18 @@ arcface_dst = np.array(
210
  )
211
 
212
 
213
- def estimate_norm(lmk, image_size=112, mode="arcface"):
214
  assert lmk.shape == (5, 2)
215
- assert image_size % 112 == 0 or image_size % 128 == 0
216
  if image_size % 112 == 0:
217
  ratio = float(image_size) / 112.0
218
  diff_x = 0
219
- else:
220
  ratio = float(image_size) / 128.0
221
  diff_x = 8.0 * ratio
 
 
 
 
222
  dst = arcface_dst * ratio
223
  dst[:, 0] += diff_x
224
  tform = trans.SimilarityTransform()
@@ -230,7 +233,7 @@ def estimate_norm(lmk, image_size=112, mode="arcface"):
230
 
231
  # aligned, M = norm_crop2(f[1], face.kps, 512)
232
  def align_crop(img, landmark, image_size=112, mode="arcface"):
233
- M = estimate_norm(landmark, image_size, mode)
234
  warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
235
  return warped, M
236
 
 
9
  import numpy as np
10
  from skimage import transform as trans
11
  from roop.capturer import get_video_frame
12
+ from roop.utilities import resolve_relative_path, conditional_thread_semaphore
13
 
14
  FACE_ANALYSER = None
15
+ #THREAD_LOCK_ANALYSER = threading.Lock()
16
+ #THREAD_LOCK_SWAPPER = threading.Lock()
17
  FACE_SWAPPER = None
18
 
19
 
20
  def get_face_analyser() -> Any:
21
  global FACE_ANALYSER
22
 
23
+ with conditional_thread_semaphore():
24
  if FACE_ANALYSER is None or roop.globals.g_current_face_analysis != roop.globals.g_desired_face_analysis:
25
  model_path = resolve_relative_path('..')
26
  # removed genderage
 
210
  )
211
 
212
 
213
+ def estimate_norm(lmk, image_size=112):
214
  assert lmk.shape == (5, 2)
 
215
  if image_size % 112 == 0:
216
  ratio = float(image_size) / 112.0
217
  diff_x = 0
218
+ elif image_size % 128 == 0:
219
  ratio = float(image_size) / 128.0
220
  diff_x = 8.0 * ratio
221
+ elif image_size % 512 == 0:
222
+ ratio = float(image_size) / 512.0
223
+ diff_x = 32.0 * ratio
224
+
225
  dst = arcface_dst * ratio
226
  dst[:, 0] += diff_x
227
  tform = trans.SimilarityTransform()
 
233
 
234
  # aligned, M = norm_crop2(f[1], face.kps, 512)
235
  def align_crop(img, landmark, image_size=112, mode="arcface"):
236
+ M = estimate_norm(landmark, image_size)
237
  warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
238
  return warped, M
239
 
roop/globals.py CHANGED
@@ -5,7 +5,9 @@ source_path = None
5
  target_path = None
6
  output_path = None
7
  target_folder_path = None
 
8
 
 
9
  frame_processors: List[str] = []
10
  keep_fps = None
11
  keep_frames = None
@@ -26,6 +28,7 @@ execution_threads = None
26
  headless = None
27
  log_level = 'error'
28
  selected_enhancer = None
 
29
  face_swap_mode = None
30
  blend_ratio = 0.5
31
  distance_threshold = 0.65
 
5
  target_path = None
6
  output_path = None
7
  target_folder_path = None
8
+ startup_args = None
9
 
10
+ cuda_device_id = 0
11
  frame_processors: List[str] = []
12
  keep_fps = None
13
  keep_frames = None
 
28
  headless = None
29
  log_level = 'error'
30
  selected_enhancer = None
31
+ subsample_size = 128
32
  face_swap_mode = None
33
  blend_ratio = 0.5
34
  distance_threshold = 0.65
roop/metadata.py CHANGED
@@ -1,2 +1,2 @@
1
  name = 'roop unleashed'
2
- version = '4.0.0'
 
1
  name = 'roop unleashed'
2
+ version = '4.3.3'
roop/util_ffmpeg.py CHANGED
@@ -73,12 +73,32 @@ def create_video(target_path: str, dest_filename: str, fps: float = 24.0, temp_d
73
 
74
 
75
  def create_gif_from_video(video_path: str, gif_path):
76
- from roop.capturer import get_video_frame
77
 
78
  fps = util.detect_fps(video_path)
79
  frame = get_video_frame(video_path)
 
80
 
81
- run_ffmpeg(['-i', video_path, '-vf', f'fps={fps},scale={frame.shape[0]}:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse', '-loop', '0', gif_path])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
 
84
  def restore_audio(intermediate_video: str, original_video: str, trim_frame_start, trim_frame_end, final_video : str) -> None:
 
73
 
74
 
75
  def create_gif_from_video(video_path: str, gif_path):
76
+ from roop.capturer import get_video_frame, release_video
77
 
78
  fps = util.detect_fps(video_path)
79
  frame = get_video_frame(video_path)
80
+ release_video()
81
 
82
+ scalex = frame.shape[0]
83
+ scaley = frame.shape[1]
84
+
85
+ if scalex >= scaley:
86
+ scaley = -1
87
+ else:
88
+ scalex = -1
89
+
90
+ run_ffmpeg(['-i', video_path, '-vf', f'fps={fps},scale={int(scalex)}:{int(scaley)}:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse', '-loop', '0', gif_path])
91
+
92
+
93
+
94
+ def create_video_from_gif(gif_path: str, output_path):
95
+ fps = util.detect_fps(gif_path)
96
+ filter = """scale='trunc(in_w/2)*2':'trunc(in_h/2)*2',format=yuv420p,fps=10"""
97
+ run_ffmpeg(['-i', gif_path, '-vf', f'"{filter}"', '-movflags', '+faststart', '-shortest', output_path])
98
+
99
+
100
+ def repair_video(original_video: str, final_video : str):
101
+ run_ffmpeg(['-i', original_video, '-movflags', 'faststart', '-acodec', 'copy', '-vcodec', 'copy', final_video])
102
 
103
 
104
  def restore_audio(intermediate_video: str, original_video: str, trim_frame_start, trim_frame_end, final_video : str) -> None:
roop/utilities.py CHANGED
@@ -13,6 +13,11 @@ import tempfile
13
  import cv2
14
  import zipfile
15
  import traceback
 
 
 
 
 
16
 
17
  from pathlib import Path
18
  from typing import List, Any
@@ -26,6 +31,10 @@ import roop.globals
26
  TEMP_FILE = "temp.mp4"
27
  TEMP_DIRECTORY = "temp"
28
 
 
 
 
 
29
  # monkey patch ssl for mac
30
  if platform.system().lower() == "darwin":
31
  ssl._create_default_https_context = ssl._create_unverified_context
@@ -173,6 +182,8 @@ def has_extension(filepath: str, extensions: List[str]) -> bool:
173
 
174
  def is_image(image_path: str) -> bool:
175
  if image_path and os.path.isfile(image_path):
 
 
176
  mimetype, _ = mimetypes.guess_type(image_path)
177
  return bool(mimetype and mimetype.startswith("image/"))
178
  return False
@@ -337,3 +348,31 @@ gradio: {gradio.__version__}
337
 
338
  def compute_cosine_distance(emb1, emb2) -> float:
339
  return distance.cosine(emb1, emb2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  import cv2
14
  import zipfile
15
  import traceback
16
+ import threading
17
+ import threading
18
+
19
+ from typing import Union, Any
20
+ from contextlib import nullcontext
21
 
22
  from pathlib import Path
23
  from typing import List, Any
 
31
  TEMP_FILE = "temp.mp4"
32
  TEMP_DIRECTORY = "temp"
33
 
34
+ THREAD_SEMAPHORE = threading.Semaphore()
35
+ NULL_CONTEXT = nullcontext()
36
+
37
+
38
  # monkey patch ssl for mac
39
  if platform.system().lower() == "darwin":
40
  ssl._create_default_https_context = ssl._create_unverified_context
 
182
 
183
  def is_image(image_path: str) -> bool:
184
  if image_path and os.path.isfile(image_path):
185
+ if image_path.endswith(".webp"):
186
+ return True
187
  mimetype, _ = mimetypes.guess_type(image_path)
188
  return bool(mimetype and mimetype.startswith("image/"))
189
  return False
 
348
 
349
  def compute_cosine_distance(emb1, emb2) -> float:
350
  return distance.cosine(emb1, emb2)
351
+
352
+ def has_cuda_device():
353
+ return torch.cuda is not None and torch.cuda.is_available()
354
+
355
+
356
+ def print_cuda_info():
357
+ try:
358
+ print(f'Number of CUDA devices: {torch.cuda.device_count()} Currently used Id: {torch.cuda.current_device()} Device Name: {torch.cuda.get_device_name(torch.cuda.current_device())}')
359
+ except:
360
+ print('No CUDA device found!')
361
+
362
+ def clean_dir(path: str):
363
+ contents = os.listdir(path)
364
+ for item in contents:
365
+ item_path = os.path.join(path, item)
366
+ try:
367
+ if os.path.isfile(item_path):
368
+ os.remove(item_path)
369
+ elif os.path.isdir(item_path):
370
+ shutil.rmtree(item_path)
371
+ except Exception as e:
372
+ print(e)
373
+
374
+
375
+ def conditional_thread_semaphore() -> Union[Any, Any]:
376
+ if 'DmlExecutionProvider' in roop.globals.execution_providers or 'ROCMExecutionProvider' in roop.globals.execution_providers:
377
+ return THREAD_SEMAPHORE
378
+ return NULL_CONTEXT
roop/virtualcam.py CHANGED
@@ -10,7 +10,7 @@ cam_active = False
10
  cam_thread = None
11
  vcam = None
12
 
13
- def virtualcamera(streamobs, cam_num,width,height):
14
  from roop.ProcessOptions import ProcessOptions
15
  from roop.core import live_swap, get_processing_plugins
16
 
@@ -44,10 +44,11 @@ def virtualcamera(streamobs, cam_num,width,height):
44
  print(f'Using {cam.native_fmt}')
45
  else:
46
  print(f'Not streaming to virtual camera!')
 
47
 
48
- # always use xseg masking
49
- options = ProcessOptions(get_processing_plugins("mask_xseg"), roop.globals.distance_threshold, roop.globals.blend_ratio,
50
- "all", 0, None, None, 1, False)
51
  while cam_active:
52
  ret, frame = cap.read()
53
  if not ret:
@@ -67,12 +68,12 @@ def virtualcamera(streamobs, cam_num,width,height):
67
 
68
 
69
 
70
- def start_virtual_cam(streamobs, cam_number, resolution):
71
  global cam_thread, cam_active
72
 
73
  if not cam_active:
74
  width, height = map(int, resolution.split('x'))
75
- cam_thread = threading.Thread(target=virtualcamera, args=[streamobs, cam_number, width, height])
76
  cam_thread.start()
77
 
78
 
@@ -83,5 +84,5 @@ def stop_virtual_cam():
83
  if cam_active:
84
  cam_active = False
85
  cam_thread.join()
86
-
87
 
 
10
  cam_thread = None
11
  vcam = None
12
 
13
+ def virtualcamera(streamobs, use_xseg, use_mouthrestore, cam_num,width,height):
14
  from roop.ProcessOptions import ProcessOptions
15
  from roop.core import live_swap, get_processing_plugins
16
 
 
44
  print(f'Using {cam.native_fmt}')
45
  else:
46
  print(f'Not streaming to virtual camera!')
47
+ subsample_size = roop.globals.subsample_size
48
 
49
+
50
+ options = ProcessOptions(get_processing_plugins("mask_xseg" if use_xseg else None), roop.globals.distance_threshold, roop.globals.blend_ratio,
51
+ "all", 0, None, None, 1, subsample_size, False, use_mouthrestore)
52
  while cam_active:
53
  ret, frame = cap.read()
54
  if not ret:
 
68
 
69
 
70
 
71
+ def start_virtual_cam(streamobs, use_xseg, use_mouthrestore, cam_number, resolution):
72
  global cam_thread, cam_active
73
 
74
  if not cam_active:
75
  width, height = map(int, resolution.split('x'))
76
+ cam_thread = threading.Thread(target=virtualcamera, args=[streamobs, use_xseg, use_mouthrestore, cam_number, width, height])
77
  cam_thread.start()
78
 
79
 
 
84
  if cam_active:
85
  cam_active = False
86
  cam_thread.join()
87
+
88