Spaces:

ai-forever
/

kandinsky-4-t2v-flash

Running on Zero

App Files Files Community

kandinsky-4-t2v-flash / app.py

ai-forever

add basic function beautify_prompt

dffc412 verified 13 days ago

raw

history blame

9.27 kB

	# исправленная версия (чтобы не потерялась)
	import subprocess
	subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
	# subprocess.run('pip install bitsandbytes', shell=True)
	subprocess.run('pip install av==12.0.0', shell=True)

	import gradio as gr
	import spaces
	#import gradio.helpers
	import torch
	import os
	from glob import glob
	from pathlib import Path
	from typing import Optional

	# from diffusers import StableVideoDiffusionPipeline
	from kandinsky import get_T2V_pipeline
	from diffusers.utils import load_image, export_to_video
	from PIL import Image

	import uuid
	import random
	from huggingface_hub import hf_hub_download

	from src.gigachat import giga_generate

	#gradio.helpers.CACHED_FOLDER = '/data/cache'

	# pipe = StableVideoDiffusionPipeline.from_pretrained(
	# "multimodalart/stable-video-diffusion", torch_dtype=torch.float16, variant="fp16"
	# )
	# pipe.to("cuda")
	#pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
	#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)

	device_map = {
	"dit": torch.device('cuda'),
	"vae": torch.device('cuda'),
	"text_embedder": torch.device('cuda')
	}
	pipe = get_T2V_pipeline(device_map)

	max_64_bit_int = 2**63 - 1

	@spaces.GPU(duration=120)
	def sample(
	# image: Image,
	prompt,
	resolution,
	seed: Optional[int] = 42,
	# randomize_seed: bool = True,
	# motion_bucket_id: int = 127,
	# fps_id: int = 6,
	# version: str = "svd_xt",
	# cond_aug: float = 0.02,
	# decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
	device: str = "cuda",
	output_folder: str = "outputs",
	progress=gr.Progress(track_tqdm=True)
	):
	# if image.mode == "RGBA":
	# image = image.convert("RGB")

	# if(randomize_seed):
	# seed = random.randint(0, max_64_bit_int)
	# generator = torch.manual_seed(seed)

	os.makedirs(output_folder, exist_ok=True)
	base_count = len(glob(os.path.join(output_folder, "*.mp4")))
	video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
	res_variants = {
	'1:1': '512x512',
	'9:16': '384x672',
	'16:9': '672x384',
	'1:2': '352x736',
	'2:1': '736x352'
	}
	width = int(res_variants[resolution].split('x')[0])
	height = int(res_variants[resolution].split('x')[1])

	# frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
	# prompt = "The camera follows behind a white vintage SUV with a black roof rack as it speeds up a steep dirt road surrounded by pine trees on a steep mountain slope, dust kicks up from it’s tires, the sunlight shines on the SUV as it speeds along the dirt road, casting a warm glow over the scene. The dirt road curves gently into the distance, with no other cars or vehicles in sight. The trees on either side of the road are redwoods, with patches of greenery scattered throughout. The car is seen from the rear following the curve with ease, making it seem as if it is on a rugged drive through the rugged terrain. The dirt road itself is surrounded by steep hills and mountains, with a clear blue sky above with wispy clouds."
	frames = pipe(
	seed=seed,
	time_length=12,
	width = width,
	height = height,
	save_path=video_path,
	text=prompt,
	)
	# export_to_video(frames, video_path, fps=8)
	torch.manual_seed(seed)

	return video_path

	def resize_image(image, output_size=(672, 384)):
	# Calculate aspect ratios
	target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
	image_aspect = image.width / image.height # Aspect ratio of the original image

	# Resize then crop if the original image is larger
	if image_aspect > target_aspect:
	# Resize the image to match the target height, maintaining aspect ratio
	new_height = output_size[1]
	new_width = int(new_height * image_aspect)
	resized_image = image.resize((new_width, new_height), Image.LANCZOS)
	# Calculate coordinates for cropping
	left = (new_width - output_size[0]) / 2
	top = 0
	right = (new_width + output_size[0]) / 2
	bottom = output_size[1]
	else:
	# Resize the image to match the target width, maintaining aspect ratio
	new_width = output_size[0]
	new_height = int(new_width / image_aspect)
	resized_image = image.resize((new_width, new_height), Image.LANCZOS)
	# Calculate coordinates for cropping
	left = 0
	top = (new_height - output_size[1]) / 2
	right = output_size[0]
	bottom = (new_height + output_size[1]) / 2

	# Crop the image
	cropped_image = resized_image.crop((left, top, right, bottom))
	return cropped_image

	with gr.Blocks() as demo:
	gr.Markdown('''# Community demo for Kandinsky 4.0''')
	with gr.Row():
	with gr.Column():
	# image = gr.Image(label="Upload your image", type="pil")
	video = gr.Video()
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt",
	container=False,
	)
	#TODO нужен здесь Row или нет, можно сразу с Markdown
	with gr.Row():
	#TODO давать ссылку на гигачат?
	#TODO заменить текст)
	gr.Markdown(
	"✨Upon pressing the enhanced prompt button, we will use [GigaChat Model](https://github.com/THUDM/GLM-4) to polish the prompt and overwrite the original one."
	)
	enhance_button = gr.Button("✨ Enhance Prompt(Optional)")

	resolution = gr.Dropdown(
	label="Video resolution",
	choices=["1:1", "9:16", "16:9", "1:2", "2:1"],
	value="16:9"
	)

	generate_btn = gr.Button("Generate")

	# with gr.Accordion("Advanced options", open=False):
	# seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
	# randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	# motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
	# fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)

	# image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)

	#TODO изменить под гигачат
	def beautify_prompt(prompt: str, retry_times: int = 3) -> str:
	prompt = giga_generate(prompt)


	# if not os.environ.get("OPENAI_API_KEY"):
	# return prompt
	# client = OpenAI()
	# text = prompt.strip()

	# for i in range(retry_times):
	# response = client.chat.completions.create(
	# messages=[
	# {"role": "system", "content": sys_prompt},
	# {
	# "role": "user",
	# "content": 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "a girl is on the beach"',
	# },
	# {
	# "role": "assistant",
	# "content": "A radiant woman stands on a deserted beach, arms outstretched, wearing a beige trench coat, white blouse, light blue jeans, and chic boots, against a backdrop of soft sky and sea. Moments later, she is seen mid-twirl, arms exuberant, with the lighting suggesting dawn or dusk. Then, she runs along the beach, her attire complemented by an off-white scarf and black ankle boots, the tranquil sea behind her. Finally, she holds a paper airplane, her pose reflecting joy and freedom, with the ocean's gentle waves and the sky's soft pastel hues enhancing the serene ambiance.",
	# }
	# ],
	# model="glm-4-plus",
	# temperature=0.01,
	# top_p=0.7,
	# stream=False,
	# max_tokens=200,
	# )
	# if response.choices:
	# return response.choices[0].message.content
	return prompt


	generate_btn.click(fn=sample, inputs=[prompt, resolution], outputs=[video], api_name="video")
	#TODO
	def enhance_prompt_func(prompt):
	return beautify_prompt(prompt, retry_times=1)
	#TODO
	enhance_button.click(enhance_prompt_func, inputs=[prompt], outputs=[prompt])
	# gr.Examples(
	# examples=[
	# "images/blink_meme.png",
	# "images/confused2_meme.png",
	# "images/disaster_meme.png",
	# "images/distracted_meme.png",
	# "images/hide_meme.png",
	# "images/nazare_meme.png",
	# "images/success_meme.png",
	# "images/willy_meme.png",
	# "images/wink_meme.png"
	# ],
	# inputs=image,
	# outputs=[video, seed],
	# fn=sample,
	# cache_examples="lazy",
	# )

	if __name__ == "__main__":
	#demo.queue(max_size=20, api_open=False)
	demo.launch(share=True, show_api=False)