Spaces:

gen6scp
/

sana-zero

Running on Zero

App Files Files Community

sana-zero / configs /sana_base.yaml

gen6scp

Patched codes for ZeroGPU

d643072 3 months ago

raw

history blame contribute delete

3.31 kB

	# data settings
	data:
	data_dir: []
	caption_proportion:
	prompt: 1
	external_caption_suffixes: []
	external_clipscore_suffixes: []
	clip_thr_temperature: 1.0
	clip_thr: 0.0
	sort_dataset: false
	load_text_feat: false
	load_vae_feat: false
	transform: default_train
	type: SanaWebDatasetMS
	image_size: 512
	hq_only: false
	valid_num: 0
	# model settings
	model:
	model: SanaMS_600M_P1_D28
	image_size: 512
	mixed_precision: fp16 # ['fp16', 'fp32', 'bf16']
	fp32_attention: true
	load_from:
	resume_from:
	checkpoint:
	load_ema: false
	resume_lr_scheduler: true
	resume_optimizer: true
	aspect_ratio_type: ASPECT_RATIO_1024
	multi_scale: true
	pe_interpolation: 1.0
	micro_condition: false
	attn_type: linear # 'flash', 'linear', 'vanilla', 'triton_linear'
	cross_norm: false
	autocast_linear_attn: false
	ffn_type: glumbconv
	mlp_acts:
	- silu
	- silu
	-
	mlp_ratio: 2.5
	use_pe: false
	qk_norm: false
	class_dropout_prob: 0.0
	linear_head_dim: 32
	# CFG & PAG settings
	cfg_scale: 4
	guidance_type: classifier-free
	pag_applied_layers: [14]
	# text encoder settings
	text_encoder:
	text_encoder_name: gemma-2-2b-it
	caption_channels: 2304
	y_norm: false
	y_norm_scale_factor: 1.0
	model_max_length: 300
	chi_prompt: []
	# VAE settings
	vae:
	vae_type: dc-ae
	vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0
	scale_factor: 0.41407
	vae_latent_dim: 32
	vae_downsample_rate: 32
	sample_posterior: true
	# Scheduler settings
	scheduler:
	train_sampling_steps: 1000
	predict_v: True
	noise_schedule: linear_flow
	pred_sigma: false
	flow_shift: 1.0
	weighting_scheme: logit_normal
	logit_mean: 0.0
	logit_std: 1.0
	vis_sampler: flow_dpm-solver
	# training settings
	train:
	num_workers: 4
	seed: 43
	train_batch_size: 32
	num_epochs: 100
	gradient_accumulation_steps: 1
	grad_checkpointing: false
	gradient_clip: 1.0
	gc_step: 1
	# optimizer settings
	optimizer:
	eps: 1.0e-10
	lr: 0.0001
	type: AdamW
	weight_decay: 0.03
	lr_schedule: constant
	lr_schedule_args:
	num_warmup_steps: 500
	auto_lr:
	rule: sqrt
	ema_rate: 0.9999
	eval_batch_size: 16
	use_fsdp: false
	use_flash_attn: false
	eval_sampling_steps: 250
	lora_rank: 4
	log_interval: 50
	mask_type: 'null'
	mask_loss_coef: 0.0
	load_mask_index: false
	snr_loss: false
	real_prompt_ratio: 1.0
	debug_nan: false
	# checkpoint settings
	save_image_epochs: 1
	save_model_epochs: 1
	save_model_steps: 1000000
	# visualization settings
	visualize: false
	null_embed_root: output/pretrained_models/
	valid_prompt_embed_root: output/tmp_embed/
	validation_prompts:
	- dog
	- portrait photo of a girl, photograph, highly detailed face, depth of field
	- Self-portrait oil painting, a beautiful cyborg with golden hair, 8k
	- Astronaut in a jungle, cold color palette, muted colors, detailed, 8k
	- A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece
	local_save_vis: false
	deterministic_validation: true
	online_metric: false
	eval_metric_step: 5000
	online_metric_dir: metric_helper
	# work dir settings
	work_dir: /cache/exps/
	skip_step: 0
	# LCM settings
	loss_type: huber
	huber_c: 0.001
	num_ddim_timesteps: 50
	w_max: 15.0
	w_min: 3.0
	ema_decay: 0.95