Files changed (2) hide show
  1. app.py +43 -10
  2. requirements.txt +3 -1
app.py CHANGED
@@ -5,6 +5,11 @@ import spaces
5
  from transformers import AutoModelForImageSegmentation
6
  import torch
7
  from torchvision import transforms
 
 
 
 
 
8
 
9
  torch.set_float32_matmul_precision(["high", "highest"][0])
10
 
@@ -20,13 +25,33 @@ transform_image = transforms.Compose(
20
  ]
21
  )
22
 
 
23
  def fn(vid):
24
- # TODO
25
- # loop over video and extract images and process each one
26
- im = load_img(vid, output_type="pil")
27
- im = im.convert("RGB")
28
- image = process(im)
29
- return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  @spaces.GPU
32
  def process(image):
@@ -38,24 +63,32 @@ def process(image):
38
  pred = preds[0].squeeze()
39
  pred_pil = transforms.ToPILImage()(pred)
40
  mask = pred_pil.resize(image_size)
41
- image.putalpha(mask)
 
 
 
 
 
 
42
  return image
43
-
 
44
  def process_file(f):
45
- name_path = f.rsplit(".",1)[0]+".png"
46
  im = load_img(f, output_type="pil")
47
  im = im.convert("RGB")
48
  transparent = process(im)
49
  transparent.save(name_path)
50
  return name_path
51
 
 
52
  in_video = gr.Video(label="birefnet")
53
  out_video = gr.Video()
54
 
55
 
56
  url = "https://hips.hearstapps.com/hmg-prod/images/gettyimages-1229892983-square.jpg"
57
  demo = gr.Interface(
58
- fn, inputs=in_video, outputs=out_video, api_name="image"
59
  )
60
 
61
 
 
5
  from transformers import AutoModelForImageSegmentation
6
  import torch
7
  from torchvision import transforms
8
+ import moviepy.editor as mp
9
+ from pydub import AudioSegment
10
+ from PIL import Image
11
+ import numpy as np
12
+
13
 
14
  torch.set_float32_matmul_precision(["high", "highest"][0])
15
 
 
25
  ]
26
  )
27
 
28
+
29
  def fn(vid):
30
+ # Load the video using moviepy
31
+ video = mp.VideoFileClip(vid)
32
+
33
+ # Extract audio from the video
34
+ audio = video.audio
35
+
36
+ # Extract frames at 12 fps
37
+ frames = video.iter_frames(fps=12)
38
+
39
+ # Process each frame for background removal
40
+ processed_frames = []
41
+ for frame in frames:
42
+ pil_image = Image.fromarray(frame)
43
+ processed_image = process(pil_image)
44
+ processed_frames.append(np.array(processed_image))
45
+
46
+ # Create a new video from the processed frames
47
+ processed_video = mp.ImageSequenceClip(processed_frames, fps=12)
48
+
49
+ # Add the original audio back to the processed video
50
+ processed_video = processed_video.set_audio(audio)
51
+
52
+ # Return the processed video
53
+ return processed_video
54
+
55
 
56
  @spaces.GPU
57
  def process(image):
 
63
  pred = preds[0].squeeze()
64
  pred_pil = transforms.ToPILImage()(pred)
65
  mask = pred_pil.resize(image_size)
66
+
67
+ # Create a green screen image
68
+ green_screen = Image.new("RGBA", image_size, (0, 255, 0, 255))
69
+
70
+ # Composite the image onto the green screen using the mask
71
+ image = Image.composite(image, green_screen, mask)
72
+
73
  return image
74
+
75
+
76
  def process_file(f):
77
+ name_path = f.rsplit(".", 1)[0] + ".png"
78
  im = load_img(f, output_type="pil")
79
  im = im.convert("RGB")
80
  transparent = process(im)
81
  transparent.save(name_path)
82
  return name_path
83
 
84
+
85
  in_video = gr.Video(label="birefnet")
86
  out_video = gr.Video()
87
 
88
 
89
  url = "https://hips.hearstapps.com/hmg-prod/images/gettyimages-1229892983-square.jpg"
90
  demo = gr.Interface(
91
+ fn, inputs=in_video, outputs=out_video, api_name="video"
92
  )
93
 
94
 
requirements.txt CHANGED
@@ -13,4 +13,6 @@ huggingface_hub
13
  transformers>=4.39.1
14
  gradio
15
  gradio_imageslider
16
- loadimg>=0.1.1
 
 
 
13
  transformers>=4.39.1
14
  gradio
15
  gradio_imageslider
16
+ loadimg>=0.1.1
17
+ moviepy
18
+ pydub