File size: 9,269 Bytes
dffc412
e44b262
 
1d2debd
0af8cd4
e44b262
adc6785
9d3c2b7
 
adc6785
9d3c2b7
 
 
 
adc6785
24472b6
 
9d3c2b7
 
adc6785
9d3c2b7
 
 
 
dffc412
 
9d3c2b7
 
24472b6
 
 
 
9d3c2b7
 
 
24472b6
 
 
 
 
 
 
9d3c2b7
 
 
 
2287193
7b1a1a0
0838eb3
9d3c2b7
2287193
 
 
 
 
 
9d3c2b7
 
 
adc6785
24472b6
 
9d3c2b7
24472b6
 
 
 
9d3c2b7
 
 
0838eb3
 
 
 
 
 
 
 
 
9d3c2b7
24472b6
7b1a1a0
24472b6
 
 
0838eb3
 
24472b6
 
 
d63392b
9d3c2b7
24472b6
207e8d9
9d3c2b7
2287193
9d3c2b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e039b6b
9d3c2b7
 
2287193
d6c2b30
 
 
 
 
 
 
 
dffc412
 
 
 
 
 
 
 
 
0838eb3
 
 
 
 
 
d6c2b30
7b1a1a0
2287193
 
 
 
 
dffc412
2287193
dffc412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0838eb3
dffc412
 
 
 
 
2287193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adc6785
 
9d3c2b7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# исправленная версия (чтобы не потерялась)
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
# subprocess.run('pip install bitsandbytes', shell=True)
subprocess.run('pip install av==12.0.0', shell=True)

import gradio as gr
import spaces
#import gradio.helpers
import torch
import os
from glob import glob
from pathlib import Path
from typing import Optional

# from diffusers import StableVideoDiffusionPipeline
from kandinsky import get_T2V_pipeline
from diffusers.utils import load_image, export_to_video
from PIL import Image

import uuid
import random
from huggingface_hub import hf_hub_download

from src.gigachat import giga_generate

#gradio.helpers.CACHED_FOLDER = '/data/cache'

# pipe = StableVideoDiffusionPipeline.from_pretrained(
#     "multimodalart/stable-video-diffusion", torch_dtype=torch.float16, variant="fp16"
# )
# pipe.to("cuda")
#pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)

device_map = {
    "dit": torch.device('cuda'), 
    "vae": torch.device('cuda'), 
    "text_embedder": torch.device('cuda')
}
pipe = get_T2V_pipeline(device_map)

max_64_bit_int = 2**63 - 1

@spaces.GPU(duration=120)
def sample(
    # image: Image,
    prompt,
    resolution,
    seed: Optional[int] = 42,
    # randomize_seed: bool = True,
    # motion_bucket_id: int = 127,
    # fps_id: int = 6,
    # version: str = "svd_xt",
    # cond_aug: float = 0.02,
    # decoding_t: int = 3,  # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
    device: str = "cuda",
    output_folder: str = "outputs",
    progress=gr.Progress(track_tqdm=True)
):
    # if image.mode == "RGBA":
    #     image = image.convert("RGB")
        
    # if(randomize_seed):
    #     seed = random.randint(0, max_64_bit_int)
    # generator = torch.manual_seed(seed)

    os.makedirs(output_folder, exist_ok=True)
    base_count = len(glob(os.path.join(output_folder, "*.mp4")))
    video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
    res_variants = {
            '1:1': '512x512',
            '9:16': '384x672',
            '16:9': '672x384',
            '1:2': '352x736',
            '2:1': '736x352'
    }
    width = int(res_variants[resolution].split('x')[0])
    height = int(res_variants[resolution].split('x')[1])

    # frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
    # prompt = "The camera follows behind a white vintage SUV with a black roof rack as it speeds up a steep dirt road surrounded by pine trees on a steep mountain slope, dust kicks up from it’s tires, the sunlight shines on the SUV as it speeds along the dirt road, casting a warm glow over the scene. The dirt road curves gently into the distance, with no other cars or vehicles in sight. The trees on either side of the road are redwoods, with patches of greenery scattered throughout. The car is seen from the rear following the curve with ease, making it seem as if it is on a rugged drive through the rugged terrain. The dirt road itself is surrounded by steep hills and mountains, with a clear blue sky above with wispy clouds."
    frames = pipe(
        seed=seed,
        time_length=12,
        width = width,
        height = height,
        save_path=video_path,
        text=prompt,
    )
    # export_to_video(frames, video_path, fps=8)
    torch.manual_seed(seed)

    return video_path

def resize_image(image, output_size=(672, 384)):
    # Calculate aspect ratios
    target_aspect = output_size[0] / output_size[1]  # Aspect ratio of the desired size
    image_aspect = image.width / image.height  # Aspect ratio of the original image

    # Resize then crop if the original image is larger
    if image_aspect > target_aspect:
        # Resize the image to match the target height, maintaining aspect ratio
        new_height = output_size[1]
        new_width = int(new_height * image_aspect)
        resized_image = image.resize((new_width, new_height), Image.LANCZOS)
        # Calculate coordinates for cropping
        left = (new_width - output_size[0]) / 2
        top = 0
        right = (new_width + output_size[0]) / 2
        bottom = output_size[1]
    else:
        # Resize the image to match the target width, maintaining aspect ratio
        new_width = output_size[0]
        new_height = int(new_width / image_aspect)
        resized_image = image.resize((new_width, new_height), Image.LANCZOS)
        # Calculate coordinates for cropping
        left = 0
        top = (new_height - output_size[1]) / 2
        right = output_size[0]
        bottom = (new_height + output_size[1]) / 2

    # Crop the image
    cropped_image = resized_image.crop((left, top, right, bottom))
    return cropped_image

with gr.Blocks() as demo:
  gr.Markdown('''# Community demo for Kandinsky 4.0''')
  with gr.Row():
    with gr.Column():
  #       image = gr.Image(label="Upload your image", type="pil")
        video = gr.Video()
        prompt = gr.Text(
            label="Prompt",
            show_label=False,
            max_lines=1,
            placeholder="Enter your prompt",
            container=False,
        )
        #TODO нужен здесь Row или нет, можно сразу с Markdown
        with gr.Row():
            #TODO давать ссылку на гигачат?
            #TODO заменить текст)
            gr.Markdown(
                "✨Upon pressing the enhanced prompt button, we will use [GigaChat Model](https://github.com/THUDM/GLM-4) to polish the prompt and overwrite the original one."
            )
            enhance_button = gr.Button("✨ Enhance Prompt(Optional)")

        resolution = gr.Dropdown(
            label="Video resolution",
            choices=["1:1", "9:16", "16:9", "1:2", "2:1"],
            value="16:9"
        )

        generate_btn = gr.Button("Generate")

  # with gr.Accordion("Advanced options", open=False):
  #     seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
  #     randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
  #     motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
  #     fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
  
  # image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)

  #TODO изменить под гигачат
  def beautify_prompt(prompt: str, retry_times: int = 3) -> str:
    prompt = giga_generate(prompt)


    # if not os.environ.get("OPENAI_API_KEY"):
    #     return prompt
    # client = OpenAI()
    # text = prompt.strip()

    # for i in range(retry_times):
    #     response = client.chat.completions.create(
    #         messages=[
    #             {"role": "system", "content": sys_prompt},
    #             {
    #                 "role": "user",
    #                 "content": 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "a girl is on the beach"',
    #             },
    #             {
    #                 "role": "assistant",
    #                 "content": "A radiant woman stands on a deserted beach, arms outstretched, wearing a beige trench coat, white blouse, light blue jeans, and chic boots, against a backdrop of soft sky and sea. Moments later, she is seen mid-twirl, arms exuberant, with the lighting suggesting dawn or dusk. Then, she runs along the beach, her attire complemented by an off-white scarf and black ankle boots, the tranquil sea behind her. Finally, she holds a paper airplane, her pose reflecting joy and freedom, with the ocean's gentle waves and the sky's soft pastel hues enhancing the serene ambiance.",
    #             }
    #         ],
    #         model="glm-4-plus",
    #         temperature=0.01,
    #         top_p=0.7,
    #         stream=False,
    #         max_tokens=200,
    #     )
    #     if response.choices:
    #         return response.choices[0].message.content
    return prompt


  generate_btn.click(fn=sample, inputs=[prompt, resolution], outputs=[video], api_name="video")
  #TODO
  def enhance_prompt_func(prompt):
    return beautify_prompt(prompt, retry_times=1)
  #TODO
  enhance_button.click(enhance_prompt_func, inputs=[prompt], outputs=[prompt])
  # gr.Examples(
  #   examples=[
  #       "images/blink_meme.png",
  #       "images/confused2_meme.png",
  #       "images/disaster_meme.png",
  #       "images/distracted_meme.png",
  #       "images/hide_meme.png",
  #       "images/nazare_meme.png",
  #       "images/success_meme.png",
  #       "images/willy_meme.png",
  #       "images/wink_meme.png"
  #   ],
  #   inputs=image,
  #   outputs=[video, seed],
  #   fn=sample,
  #   cache_examples="lazy",
  # )

if __name__ == "__main__":
    #demo.queue(max_size=20, api_open=False)
    demo.launch(share=True, show_api=False)