franciszzj
commited on
Commit
•
bafa7b2
1
Parent(s):
b213d84
rm conf
Browse files- leffa/conf/TARGETS +0 -17
- leffa/conf/constants/base.yaml +0 -31
- leffa/conf/datasets/deepfashion_test.yaml +0 -47
- leffa/conf/datasets/deepfashion_train.yaml +0 -47
- leffa/conf/datasets/dress_code_test.yaml +0 -56
- leffa/conf/datasets/dress_code_train.yaml +0 -55
- leffa/conf/datasets/viton_hd_test.yaml +0 -54
- leffa/conf/datasets/viton_hd_test_local.yaml +0 -20
- leffa/conf/datasets/viton_hd_train.yaml +0 -55
- leffa/conf/eval/base.yaml +0 -4
- leffa/conf/model/cat_vton_flux.yaml +0 -19
- leffa/conf/model/cat_vton_sd15.yaml +0 -20
- leffa/conf/model/cat_vton_sdxl.yaml +0 -11
- leffa/conf/model/idm_vton_sd15.yaml +0 -16
- leffa/conf/model/idm_vton_sdxl.yaml +0 -17
- leffa/conf/model/simple_vton_sd15.yaml +0 -23
- leffa/conf/model/simple_vton_sdxl.yaml +0 -22
- leffa/conf/predict.yaml +0 -66
- leffa/conf/test.yaml +0 -89
- leffa/conf/train.yaml +0 -84
- leffa/conf/train_local.yaml +0 -9
- leffa/conf/train_mae.yaml +0 -159
- leffa/conf/trainer/base.yaml +0 -14
leffa/conf/TARGETS
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
load("//gen_ai/genie/components:macros.bzl", "genie_hydra_config_bundle")
|
2 |
-
|
3 |
-
oncall("genads_infra")
|
4 |
-
|
5 |
-
# All configs including dataloading, torchtnt, and profiling.
|
6 |
-
# Note you need all dependencies for hydra instantiation here.
|
7 |
-
genie_hydra_config_bundle(
|
8 |
-
name = "idm_vton_hydra_configs",
|
9 |
-
srcs = glob(["**/*.yaml"]),
|
10 |
-
deps = [
|
11 |
-
"//caffe2:torch",
|
12 |
-
"//genads/common/data:transforms",
|
13 |
-
"//genads/idm_vton:idm_vton_lib",
|
14 |
-
"//media_dataloader/api:api",
|
15 |
-
"//torchmultimodal/fb/genai/transforms:transforms",
|
16 |
-
],
|
17 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/constants/base.yaml
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
# for virtual try-on
|
2 |
-
# height: 512
|
3 |
-
# width: 384
|
4 |
-
# batch_size: 8
|
5 |
-
height: 1024
|
6 |
-
width: 768
|
7 |
-
batch_size: 2
|
8 |
-
|
9 |
-
# for pose transfer
|
10 |
-
# height: 256
|
11 |
-
# width: 176
|
12 |
-
# batch_size: 8
|
13 |
-
# height: 512
|
14 |
-
# width: 352
|
15 |
-
# batch_size: 4
|
16 |
-
# height: 1024
|
17 |
-
# width: 704
|
18 |
-
# batch_size: 1
|
19 |
-
|
20 |
-
precision: bf16
|
21 |
-
|
22 |
-
max_steps: null
|
23 |
-
max_epochs: 200
|
24 |
-
max_train_steps_per_epoch: null
|
25 |
-
|
26 |
-
evaluate_every_n_train_steps: null
|
27 |
-
evaluate_every_n_train_epochs: null
|
28 |
-
max_eval_steps_per_eval_epoch: null
|
29 |
-
|
30 |
-
use_torchsnapshot: false
|
31 |
-
checkpoint_every_n_steps: 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/datasets/deepfashion_test.yaml
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
deepfashion_test:
|
2 |
-
dataset:
|
3 |
-
_target_: media_dataloader.api.EnrichingDataset
|
4 |
-
datasource:
|
5 |
-
_target_: media_dataloader.api.LazyHiveDataSource
|
6 |
-
namespace: mgenai
|
7 |
-
table: deepfashion_pose_transfer
|
8 |
-
partition_filter_predicate_list: ["ds = '2024-08-15' AND set_name = 'val'"]
|
9 |
-
enrichments:
|
10 |
-
- _target_: media_dataloader.api.media_lookups.ManifoldLookups
|
11 |
-
lookup_handle_to_media_columns:
|
12 |
-
to_img_manifold_path: "image"
|
13 |
-
from_img_manifold_path: "cloth"
|
14 |
-
to_img_iuv_manifold_path: "image_densepose"
|
15 |
-
from_img_iuv_manifold_path: "cloth_densepose"
|
16 |
-
collate_fn:
|
17 |
-
- _target_: media_dataloader.api.Collate
|
18 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
19 |
-
image_field: image
|
20 |
-
blob_field: image
|
21 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
22 |
-
image_field: cloth
|
23 |
-
blob_field: cloth
|
24 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
25 |
-
image_field: image_densepose
|
26 |
-
blob_field: image_densepose
|
27 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
28 |
-
image_field: cloth_densepose
|
29 |
-
blob_field: cloth_densepose
|
30 |
-
- _target_: leffa.datasets.transform.VtonTransform
|
31 |
-
height: ${constants.height}
|
32 |
-
width: ${constants.width}
|
33 |
-
is_train: false
|
34 |
-
dataset: deepfashion
|
35 |
-
aug_garment_ratio: 0.0
|
36 |
-
get_garment_from_person_ratio: 0.0
|
37 |
-
aug_mask_ratio: 0.0
|
38 |
-
|
39 |
-
dataloader:
|
40 |
-
_target_: media_dataloader.api.StatefulDataLoader
|
41 |
-
dataset: ${datasets.deepfashion_test.dataset}
|
42 |
-
batch_size: ${constants.batch_size}
|
43 |
-
num_workers: 0
|
44 |
-
prefetch_factor: null
|
45 |
-
pin_memory: true
|
46 |
-
persistent_workers: false
|
47 |
-
multiprocessing_context: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/datasets/deepfashion_train.yaml
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
deepfashion_train:
|
2 |
-
dataset:
|
3 |
-
_target_: media_dataloader.api.EnrichingDataset
|
4 |
-
datasource:
|
5 |
-
_target_: media_dataloader.api.LazyHiveDataSource
|
6 |
-
namespace: mgenai
|
7 |
-
table: deepfashion_pose_transfer
|
8 |
-
partition_filter_predicate_list: ["ds = '2024-08-15' AND set_name = 'train'"]
|
9 |
-
enrichments:
|
10 |
-
- _target_: media_dataloader.api.media_lookups.ManifoldLookups
|
11 |
-
lookup_handle_to_media_columns:
|
12 |
-
to_img_manifold_path: "image"
|
13 |
-
from_img_manifold_path: "cloth"
|
14 |
-
to_img_iuv_manifold_path: "image_densepose"
|
15 |
-
from_img_iuv_manifold_path: "cloth_densepose"
|
16 |
-
collate_fn:
|
17 |
-
- _target_: media_dataloader.api.Collate
|
18 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
19 |
-
image_field: image
|
20 |
-
blob_field: image
|
21 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
22 |
-
image_field: cloth
|
23 |
-
blob_field: cloth
|
24 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
25 |
-
image_field: image_densepose
|
26 |
-
blob_field: image_densepose
|
27 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
28 |
-
image_field: cloth_densepose
|
29 |
-
blob_field: cloth_densepose
|
30 |
-
- _target_: leffa.datasets.transform.VtonTransform
|
31 |
-
height: ${constants.height}
|
32 |
-
width: ${constants.width}
|
33 |
-
is_train: true
|
34 |
-
dataset: deepfashion
|
35 |
-
aug_garment_ratio: 0.0
|
36 |
-
get_garment_from_person_ratio: 0.0
|
37 |
-
aug_mask_ratio: 0.0
|
38 |
-
|
39 |
-
dataloader:
|
40 |
-
_target_: media_dataloader.api.StatefulDataLoader
|
41 |
-
dataset: ${datasets.deepfashion_train.dataset}
|
42 |
-
batch_size: ${constants.batch_size}
|
43 |
-
num_workers: 4
|
44 |
-
prefetch_factor: 2
|
45 |
-
pin_memory: true
|
46 |
-
persistent_workers: true
|
47 |
-
multiprocessing_context: forkserver
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/datasets/dress_code_test.yaml
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
dress_code_test:
|
2 |
-
dataset:
|
3 |
-
_target_: media_dataloader.api.EnrichingDataset
|
4 |
-
datasource:
|
5 |
-
_target_: media_dataloader.api.LazyHiveDataSource
|
6 |
-
namespace: ad_metrics
|
7 |
-
table: vton_public_dataset_dress_code_test_paired_v2
|
8 |
-
# table: vton_public_dataset_dress_code_test_unpaired_v2
|
9 |
-
# table: vton_public_dataset_dress_code_test_upper_body_paired_v2
|
10 |
-
# table: vton_public_dataset_dress_code_test_upper_body_unpaired_v2
|
11 |
-
partition_filter_predicate_list: ["ds = '2024-09-14'"]
|
12 |
-
# table: vton_public_dataset_dress_code_test_lower_body_paired_v2
|
13 |
-
# table: vton_public_dataset_dress_code_test_lower_body_unpaired_v2
|
14 |
-
# table: vton_public_dataset_dress_code_test_dresses_paired_v2
|
15 |
-
# table: vton_public_dataset_dress_code_test_dresses_unpaired_v2
|
16 |
-
# partition_filter_predicate_list: ["ds = '2024-09-16'"]
|
17 |
-
enrichments:
|
18 |
-
- _target_: media_dataloader.api.media_lookups.ManifoldLookups
|
19 |
-
lookup_handle_to_media_columns:
|
20 |
-
image_manifold_path: "image"
|
21 |
-
cloth_manifold_path: "cloth"
|
22 |
-
agnostic_mask_manifold_path: "agnostic_mask"
|
23 |
-
dense_manifold_path: "image_densepose"
|
24 |
-
label_map_manifold_path: "image_parse"
|
25 |
-
collate_fn:
|
26 |
-
- _target_: media_dataloader.api.Collate
|
27 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
28 |
-
image_field: image
|
29 |
-
blob_field: image
|
30 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
31 |
-
image_field: cloth
|
32 |
-
blob_field: cloth
|
33 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
34 |
-
image_field: agnostic_mask
|
35 |
-
blob_field: agnostic_mask
|
36 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
37 |
-
image_field: image_densepose
|
38 |
-
blob_field: image_densepose
|
39 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
40 |
-
image_field: image_parse
|
41 |
-
blob_field: image_parse
|
42 |
-
- _target_: leffa.datasets.transform.VtonTransform
|
43 |
-
height: ${constants.height}
|
44 |
-
width: ${constants.width}
|
45 |
-
is_train: false
|
46 |
-
dataset: dress_code
|
47 |
-
|
48 |
-
dataloader:
|
49 |
-
_target_: media_dataloader.api.StatefulDataLoader
|
50 |
-
dataset: ${datasets.dress_code_test.dataset}
|
51 |
-
batch_size: ${constants.batch_size}
|
52 |
-
num_workers: 0
|
53 |
-
prefetch_factor: null
|
54 |
-
pin_memory: true
|
55 |
-
persistent_workers: false
|
56 |
-
multiprocessing_context: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/datasets/dress_code_train.yaml
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
dress_code_train:
|
2 |
-
dataset:
|
3 |
-
_target_: media_dataloader.api.EnrichingDataset
|
4 |
-
datasource:
|
5 |
-
_target_: media_dataloader.api.LazyHiveDataSource
|
6 |
-
namespace: ad_metrics
|
7 |
-
table: vton_public_dataset_dress_code_train_v2
|
8 |
-
# table: vton_public_dataset_dress_code_train_upper_body_v2
|
9 |
-
partition_filter_predicate_list: ["ds = '2024-09-14'"]
|
10 |
-
# table: vton_public_dataset_dress_code_train_lower_body_v2
|
11 |
-
# table: vton_public_dataset_dress_code_train_dresses_v2
|
12 |
-
# partition_filter_predicate_list: ["ds = '2024-09-15'"]
|
13 |
-
enrichments:
|
14 |
-
- _target_: media_dataloader.api.media_lookups.ManifoldLookups
|
15 |
-
lookup_handle_to_media_columns:
|
16 |
-
image_manifold_path: "image"
|
17 |
-
cloth_manifold_path: "cloth"
|
18 |
-
agnostic_mask_manifold_path: "agnostic_mask"
|
19 |
-
dense_manifold_path: "image_densepose"
|
20 |
-
label_map_manifold_path: "image_parse"
|
21 |
-
collate_fn:
|
22 |
-
- _target_: media_dataloader.api.Collate
|
23 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
24 |
-
image_field: image
|
25 |
-
blob_field: image
|
26 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
27 |
-
image_field: cloth
|
28 |
-
blob_field: cloth
|
29 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
30 |
-
image_field: agnostic_mask
|
31 |
-
blob_field: agnostic_mask
|
32 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
33 |
-
image_field: image_densepose
|
34 |
-
blob_field: image_densepose
|
35 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
36 |
-
image_field: image_parse
|
37 |
-
blob_field: image_parse
|
38 |
-
- _target_: leffa.datasets.transform.VtonTransform
|
39 |
-
height: ${constants.height}
|
40 |
-
width: ${constants.width}
|
41 |
-
is_train: true
|
42 |
-
dataset: dress_code
|
43 |
-
aug_garment_ratio: 0.0
|
44 |
-
get_garment_from_person_ratio: 0.0
|
45 |
-
aug_mask_ratio: 0.0
|
46 |
-
|
47 |
-
dataloader:
|
48 |
-
_target_: media_dataloader.api.StatefulDataLoader
|
49 |
-
dataset: ${datasets.dress_code_train.dataset}
|
50 |
-
batch_size: ${constants.batch_size}
|
51 |
-
num_workers: 4
|
52 |
-
prefetch_factor: 2
|
53 |
-
pin_memory: true
|
54 |
-
persistent_workers: true
|
55 |
-
multiprocessing_context: forkserver
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/datasets/viton_hd_test.yaml
DELETED
@@ -1,54 +0,0 @@
|
|
1 |
-
viton_hd_test:
|
2 |
-
dataset:
|
3 |
-
_target_: media_dataloader.api.EnrichingDataset
|
4 |
-
datasource:
|
5 |
-
_target_: media_dataloader.api.LazyHiveDataSource
|
6 |
-
namespace: ad_metrics
|
7 |
-
table: vton_public_dataset_viton_hd_test_paired_v2
|
8 |
-
partition_filter_predicate_list: ["ds = '2024-10-30'"]
|
9 |
-
# table: vton_public_dataset_viton_hd_test_unpaired_v1
|
10 |
-
# partition_filter_predicate_list: ["ds = '2024-09-12'"]
|
11 |
-
enrichments:
|
12 |
-
- _target_: media_dataloader.api.media_lookups.ManifoldLookups
|
13 |
-
lookup_handle_to_media_columns:
|
14 |
-
image_manifold_path: "image"
|
15 |
-
cloth_manifold_path: "cloth"
|
16 |
-
agnostic_mask_manifold_path: "agnostic_mask"
|
17 |
-
image_densepose_manifold_path: "image_densepose"
|
18 |
-
cloth_mask_manifold_path: "cloth_mask"
|
19 |
-
image_parse_v3_manifold_path: "image_parse"
|
20 |
-
collate_fn:
|
21 |
-
- _target_: media_dataloader.api.Collate
|
22 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
23 |
-
image_field: image
|
24 |
-
blob_field: image
|
25 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
26 |
-
image_field: cloth
|
27 |
-
blob_field: cloth
|
28 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
29 |
-
image_field: agnostic_mask
|
30 |
-
blob_field: agnostic_mask
|
31 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
32 |
-
image_field: image_densepose
|
33 |
-
blob_field: image_densepose
|
34 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
35 |
-
image_field: cloth_mask
|
36 |
-
blob_field: cloth_mask
|
37 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
38 |
-
image_field: image_parse
|
39 |
-
blob_field: image_parse
|
40 |
-
- _target_: leffa.datasets.transform.VtonTransform
|
41 |
-
height: ${constants.height}
|
42 |
-
width: ${constants.width}
|
43 |
-
is_train: false
|
44 |
-
dataset: viton_hd
|
45 |
-
|
46 |
-
dataloader:
|
47 |
-
_target_: media_dataloader.api.StatefulDataLoader
|
48 |
-
dataset: ${datasets.viton_hd_test.dataset}
|
49 |
-
batch_size: ${constants.batch_size}
|
50 |
-
num_workers: 0
|
51 |
-
prefetch_factor: null
|
52 |
-
pin_memory: true
|
53 |
-
persistent_workers: false
|
54 |
-
multiprocessing_context: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/datasets/viton_hd_test_local.yaml
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
viton_hd_test_local:
|
2 |
-
dataset:
|
3 |
-
_target_: leffa.datasets.viton_hd.VitonHDLocalDataset
|
4 |
-
# _target_: leffa.datasets.random_dataset.RandomDataset
|
5 |
-
dataroot_path: /home/zijianzhou/data/viton_hd
|
6 |
-
phase: test
|
7 |
-
order: unpaired
|
8 |
-
size:
|
9 |
-
- ${constants.height}
|
10 |
-
- ${constants.width}
|
11 |
-
|
12 |
-
dataloader:
|
13 |
-
_target_: torch.utils.data.DataLoader
|
14 |
-
dataset: ${datasets.viton_hd_test_local.dataset}
|
15 |
-
batch_size: ${constants.batch_size}
|
16 |
-
num_workers: 0
|
17 |
-
prefetch_factor: null
|
18 |
-
pin_memory: true
|
19 |
-
persistent_workers: false
|
20 |
-
multiprocessing_context: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/datasets/viton_hd_train.yaml
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
viton_hd_train:
|
2 |
-
dataset:
|
3 |
-
_target_: media_dataloader.api.EnrichingDataset
|
4 |
-
datasource:
|
5 |
-
_target_: media_dataloader.api.LazyHiveDataSource
|
6 |
-
namespace: ad_metrics
|
7 |
-
table: vton_public_dataset_viton_hd_train_v2
|
8 |
-
partition_filter_predicate_list: ["ds = '2024-10-30'"]
|
9 |
-
enrichments:
|
10 |
-
- _target_: media_dataloader.api.media_lookups.ManifoldLookups
|
11 |
-
lookup_handle_to_media_columns:
|
12 |
-
image_manifold_path: "image"
|
13 |
-
cloth_manifold_path: "cloth"
|
14 |
-
agnostic_mask_manifold_path: "agnostic_mask"
|
15 |
-
image_densepose_manifold_path: "image_densepose"
|
16 |
-
cloth_mask_manifold_path: "cloth_mask"
|
17 |
-
image_parse_v3_manifold_path: "image_parse"
|
18 |
-
collate_fn:
|
19 |
-
- _target_: media_dataloader.api.Collate
|
20 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
21 |
-
image_field: image
|
22 |
-
blob_field: image
|
23 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
24 |
-
image_field: cloth
|
25 |
-
blob_field: cloth
|
26 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
27 |
-
image_field: agnostic_mask
|
28 |
-
blob_field: agnostic_mask
|
29 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
30 |
-
image_field: image_densepose
|
31 |
-
blob_field: image_densepose
|
32 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
33 |
-
image_field: cloth_mask
|
34 |
-
blob_field: cloth_mask
|
35 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
36 |
-
image_field: image_parse
|
37 |
-
blob_field: image_parse
|
38 |
-
- _target_: leffa.datasets.transform.VtonTransform
|
39 |
-
height: ${constants.height}
|
40 |
-
width: ${constants.width}
|
41 |
-
is_train: true
|
42 |
-
dataset: viton_hd
|
43 |
-
aug_garment_ratio: 0.0
|
44 |
-
get_garment_from_person_ratio: 0.0
|
45 |
-
aug_mask_ratio: 0.0
|
46 |
-
|
47 |
-
dataloader:
|
48 |
-
_target_: media_dataloader.api.StatefulDataLoader
|
49 |
-
dataset: ${datasets.viton_hd_train.dataset}
|
50 |
-
batch_size: ${constants.batch_size}
|
51 |
-
num_workers: 4
|
52 |
-
prefetch_factor: 2
|
53 |
-
pin_memory: true
|
54 |
-
persistent_workers: true
|
55 |
-
multiprocessing_context: forkserver
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/eval/base.yaml
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
warmup_iters: 0
|
2 |
-
evaluate_every_n_train_steps: ${constants.evaluate_every_n_train_steps}
|
3 |
-
evaluate_every_n_train_epochs: ${constants.evaluate_every_n_train_epochs}
|
4 |
-
max_eval_steps_per_eval_epoch: ${constants.max_eval_steps_per_eval_epoch}
|
|
|
|
|
|
|
|
|
|
leffa/conf/model/cat_vton_flux.yaml
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
_target_: leffa.models.cat_vton_model.CatVtonModel
|
2 |
-
# FLUX.1-dev
|
3 |
-
pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/FLUX.1-dev
|
4 |
-
new_in_channels: 33 # 16+1+16
|
5 |
-
height: ${constants.height}
|
6 |
-
width: ${constants.width}
|
7 |
-
garment_dropout_ratio: 0.1
|
8 |
-
use_dream: false
|
9 |
-
dream_detail_preservation: 10.0
|
10 |
-
use_garment_mask: false
|
11 |
-
only_optimize_unet_attn1: true
|
12 |
-
use_learning_flow_in_attention: false
|
13 |
-
learning_flow_in_attention_lambda: 0.001
|
14 |
-
learning_flow_in_attention_stop_timestep: 500
|
15 |
-
use_attention_flow_loss: false
|
16 |
-
attention_flow_loss_lambda: 0.001
|
17 |
-
use_pixel_space_supervision: false
|
18 |
-
pixel_space_supervision_lambda: 10.0
|
19 |
-
use_densepose: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/model/cat_vton_sd15.yaml
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
_target_: leffa.models.cat_vton_model.CatVtonModel
|
2 |
-
# SD1.5
|
3 |
-
pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
|
4 |
-
pretrained_vae_name_or_path: manifold://genads_models/tree/zijianzhou/model/sd-vae-ft-mse
|
5 |
-
new_in_channels: 9
|
6 |
-
height: ${constants.height}
|
7 |
-
width: ${constants.width}
|
8 |
-
garment_dropout_ratio: 0.1
|
9 |
-
use_dream: true
|
10 |
-
dream_detail_preservation: 10.0
|
11 |
-
use_garment_mask: false
|
12 |
-
only_optimize_unet_attn1: true
|
13 |
-
use_learning_flow_in_attention: false
|
14 |
-
learning_flow_in_attention_lambda: 0.001
|
15 |
-
learning_flow_in_attention_stop_timestep: 500
|
16 |
-
use_attention_flow_loss: false
|
17 |
-
attention_flow_loss_lambda: 0.001
|
18 |
-
use_pixel_space_supervision: true
|
19 |
-
pixel_space_supervision_lambda: 10.0
|
20 |
-
use_densepose: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/model/cat_vton_sdxl.yaml
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
_target_: leffa.models.cat_vton_model.CatVtonModel
|
2 |
-
# SDXL
|
3 |
-
pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-1.0-inpainting-0.1
|
4 |
-
new_in_channels: 9
|
5 |
-
height: ${constants.height}
|
6 |
-
width: ${constants.width}
|
7 |
-
garment_dropout_ratio: 0.1
|
8 |
-
use_dream: true
|
9 |
-
dream_detail_preservation: 10.0
|
10 |
-
use_garment_mask: false
|
11 |
-
only_optimize_unet_attn1: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/model/idm_vton_sd15.yaml
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
_target_: leffa.models.idm_vton_model.IdmVtonModel
|
2 |
-
pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
|
3 |
-
pretrained_vae_name_or_path: manifold://genads_models/tree/zijianzhou/model/sd-vae-ft-mse
|
4 |
-
pretrained_garmentnet_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
|
5 |
-
new_in_channels: 13
|
6 |
-
height: ${constants.height}
|
7 |
-
width: ${constants.width}
|
8 |
-
garment_dropout_ratio: 0.1
|
9 |
-
use_dream: false
|
10 |
-
dream_detail_preservation: 10.0
|
11 |
-
skip_cross_attention: true
|
12 |
-
skip_cross_attention_garmentnet: true
|
13 |
-
copy_unet_to_unet_encoder: false
|
14 |
-
only_optimize_unet_attn1: false
|
15 |
-
optimize_unet: true
|
16 |
-
optimize_unet_encoder: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/model/idm_vton_sdxl.yaml
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
_target_: leffa.models.idm_vton_model.IdmVtonModel
|
2 |
-
pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-1.0-inpainting-0.1
|
3 |
-
pretrained_garmentnet_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-base-1.0
|
4 |
-
pretrained_image_encoder_path: manifold://genads_models/tree/zijianzhou/model/IP-Adapter/models/image_encoder
|
5 |
-
pretrained_ip_adapter_path: manifold://genads_models/tree/zijianzhou/model/IP-Adapter/sdxl_models/ip-adapter-plus_sdxl_vit-h.bin
|
6 |
-
new_in_channels: 13
|
7 |
-
height: ${constants.height}
|
8 |
-
width: ${constants.width}
|
9 |
-
garment_dropout_ratio: 0.1
|
10 |
-
use_dream: false
|
11 |
-
dream_detail_preservation: 10.0
|
12 |
-
skip_cross_attention: false
|
13 |
-
skip_cross_attention_garmentnet: false
|
14 |
-
copy_unet_to_unet_encoder: false
|
15 |
-
only_optimize_unet_attn1: false
|
16 |
-
optimize_unet: true
|
17 |
-
optimize_unet_encoder: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/model/simple_vton_sd15.yaml
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
_target_: leffa.models.simple_vton_model.SimpleVtonModel
|
2 |
-
pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
|
3 |
-
pretrained_vae_name_or_path: manifold://genads_models/tree/zijianzhou/model/sd-vae-ft-mse
|
4 |
-
pretrained_garmentnet_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
|
5 |
-
new_in_channels: 12
|
6 |
-
height: ${constants.height}
|
7 |
-
width: ${constants.width}
|
8 |
-
garment_dropout_ratio: 0.1
|
9 |
-
use_dream: false
|
10 |
-
dream_detail_preservation: 10.0
|
11 |
-
skip_cross_attention: true
|
12 |
-
skip_cross_attention_garmentnet: true
|
13 |
-
copy_unet_to_unet_encoder: false
|
14 |
-
only_optimize_unet_attn1: false
|
15 |
-
optimize_unet: true
|
16 |
-
optimize_unet_encoder: true
|
17 |
-
use_learning_flow_in_attention: false
|
18 |
-
learning_flow_in_attention_lambda: 0.001
|
19 |
-
learning_flow_in_attention_stop_timestep: 500
|
20 |
-
use_attention_flow_loss: false
|
21 |
-
attention_flow_loss_lambda: 0.001
|
22 |
-
use_pixel_space_supervision: false
|
23 |
-
pixel_space_supervision_lambda: 10.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/model/simple_vton_sdxl.yaml
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
_target_: leffa.models.simple_vton_model.SimpleVtonModel
|
2 |
-
pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-1.0-inpainting-0.1
|
3 |
-
pretrained_garmentnet_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-base-1.0
|
4 |
-
new_in_channels: 12
|
5 |
-
height: ${constants.height}
|
6 |
-
width: ${constants.width}
|
7 |
-
garment_dropout_ratio: 0.1
|
8 |
-
use_dream: false
|
9 |
-
dream_detail_preservation: 10.0
|
10 |
-
skip_cross_attention: true
|
11 |
-
skip_cross_attention_garmentnet: true
|
12 |
-
copy_unet_to_unet_encoder: false
|
13 |
-
only_optimize_unet_attn1: false
|
14 |
-
optimize_unet: true
|
15 |
-
optimize_unet_encoder: true
|
16 |
-
use_learning_flow_in_attention: false
|
17 |
-
learning_flow_in_attention_lambda: 0.001
|
18 |
-
learning_flow_in_attention_stop_timestep: 500
|
19 |
-
use_attention_flow_loss: false
|
20 |
-
attention_flow_loss_lambda: 0.001
|
21 |
-
use_pixel_space_supervision: false
|
22 |
-
pixel_space_supervision_lambda: 10.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/predict.yaml
DELETED
@@ -1,66 +0,0 @@
|
|
1 |
-
# File used for running pure prediction with torchtnt, including a custom score computation.
|
2 |
-
|
3 |
-
seed: 42
|
4 |
-
|
5 |
-
defaults:
|
6 |
-
- constants: base
|
7 |
-
# - model: idm_vton_sd15
|
8 |
-
# - model: idm_vton_sdxl
|
9 |
-
# - model: cat_vton_sd15
|
10 |
-
# - model: cat_vton_sdxl
|
11 |
-
- model: simple_vton_sd15
|
12 |
-
# - model: simple_vton_sdxl
|
13 |
-
# - model: simple_vton_sd15_fluxvae
|
14 |
-
# - model: simple_vton_sdxl_fluxvae
|
15 |
-
- datasets:
|
16 |
-
- viton_hd_test
|
17 |
-
- dress_code_test
|
18 |
-
- deepfashion_test
|
19 |
-
- _self_
|
20 |
-
|
21 |
-
constants:
|
22 |
-
# for virtual try-on
|
23 |
-
# height: 512
|
24 |
-
# width: 384
|
25 |
-
height: 1024
|
26 |
-
width: 768
|
27 |
-
# for pose transfer
|
28 |
-
# height: 256
|
29 |
-
# width: 176
|
30 |
-
# height: 512
|
31 |
-
# width: 352
|
32 |
-
# height: 1024
|
33 |
-
# width: 704
|
34 |
-
batch_size: 1
|
35 |
-
|
36 |
-
# null to go through whole dataloader.
|
37 |
-
max_steps_per_epoch: null
|
38 |
-
dataloader: ${datasets.viton_hd_test.dataloader}
|
39 |
-
# dataloader: ${datasets.dress_code_test.dataloader}
|
40 |
-
# dataloader: ${datasets.deepfashion_test.dataloader}
|
41 |
-
manifold_log_dir: manifold://genads_models/tree/zijianzhou/output/simple_vton/tmp
|
42 |
-
|
43 |
-
inference:
|
44 |
-
# _target_: leffa.inference.IdmVtonInference
|
45 |
-
# _target_: leffa.inference.CatVtonInference
|
46 |
-
_target_: leffa.inference.SimpleVtonInference
|
47 |
-
_partial_: True
|
48 |
-
model: ${model}
|
49 |
-
model_entity_id: null
|
50 |
-
checkpoint_version: null
|
51 |
-
|
52 |
-
callbacks:
|
53 |
-
- _target_: leffa.callbacks.save_image_callback.SaveImageCallback
|
54 |
-
manifold_path: ${manifold_log_dir}/generated_paired
|
55 |
-
# manifold_path: ${manifold_log_dir}/generated_unpaired
|
56 |
-
task_type: vton
|
57 |
-
# manifold_path: ${manifold_log_dir}
|
58 |
-
# task_type: pose_transfer
|
59 |
-
|
60 |
-
unit:
|
61 |
-
_target_: leffa.vton_pred_unit.VtonPredUnit
|
62 |
-
inference_fn: ${inference}
|
63 |
-
|
64 |
-
hydra:
|
65 |
-
run:
|
66 |
-
dir: /tmp/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/test.yaml
DELETED
@@ -1,89 +0,0 @@
|
|
1 |
-
### Main entry for the training config in hydra.
|
2 |
-
### Only top level configurations can run, we decompose the full
|
3 |
-
### config to multiple subfolders for better reusability.
|
4 |
-
|
5 |
-
seed: 42
|
6 |
-
|
7 |
-
defaults:
|
8 |
-
- constants: base
|
9 |
-
- model: simple_vton_sd15
|
10 |
-
- trainer: base
|
11 |
-
- eval: base
|
12 |
-
- datasets:
|
13 |
-
- viton_hd_train
|
14 |
-
- viton_hd_test
|
15 |
-
- viton_hd_test_local
|
16 |
-
- dress_code_train
|
17 |
-
- dress_code_test
|
18 |
-
- deepfashion_train
|
19 |
-
- deepfashion_test
|
20 |
-
- _self_
|
21 |
-
|
22 |
-
constants:
|
23 |
-
height: 1024
|
24 |
-
width: 768
|
25 |
-
batch_size: 1
|
26 |
-
|
27 |
-
train_dataset: ${datasets.viton_hd_train}
|
28 |
-
# train_dataset: ${datasets.dress_code_train}
|
29 |
-
# train_dataset: ${datasets.deepfashion_train}
|
30 |
-
eval_dataset: ${datasets.viton_hd_test}
|
31 |
-
|
32 |
-
unit:
|
33 |
-
_target_: leffa.vton_unit.VtonUnit
|
34 |
-
_partial_: True
|
35 |
-
model: ${model}
|
36 |
-
# strategy: ddp
|
37 |
-
strategy:
|
38 |
-
_target_: leffa.utils.create_fsdp_strategy
|
39 |
-
sharding_strategy: SHARD_GRAD_OP
|
40 |
-
state_dict_type: SHARDED_STATE_DICT
|
41 |
-
mixed_precision:
|
42 |
-
param_dtype: ${constants.precision}
|
43 |
-
reduce_dtype: ${constants.precision}
|
44 |
-
cast_forward_inputs: True
|
45 |
-
class_paths:
|
46 |
-
# For VAE (first stage)
|
47 |
-
- diffusers.models.unets.unet_2d_blocks.DownEncoderBlock2D
|
48 |
-
- diffusers.models.unets.unet_2d_blocks.UNetMidBlock2D
|
49 |
-
- diffusers.models.unets.unet_2d_blocks.UpDecoderBlock2D
|
50 |
-
# For UNet (unet stage) IdmVton
|
51 |
-
- leffa.models.diffusion_model.attentionhacked_tryon.BasicTransformerBlock
|
52 |
-
- leffa.models.diffusion_model.attentionhacked_garment.BasicTransformerBlock
|
53 |
-
# For UNet (unet stage) CatVton
|
54 |
-
- diffusers.models.attention.BasicTransformerBlock
|
55 |
-
# For CLIP (condition stage)
|
56 |
-
- transformers.CLIPTextModel
|
57 |
-
- transformers.CLIPTextModelWithProjection
|
58 |
-
- transformers.CLIPVisionModelWithProjection
|
59 |
-
optim_fn:
|
60 |
-
_target_: torch.optim.AdamW
|
61 |
-
_partial_: True
|
62 |
-
lr: 1.0e-5
|
63 |
-
betas: [0.9, 0.999]
|
64 |
-
eps: 1.0e-8
|
65 |
-
weight_decay: 1.0e-2
|
66 |
-
amsgrad: false
|
67 |
-
lr_scheduler_fn:
|
68 |
-
_target_: torch.optim.lr_scheduler.ConstantLR
|
69 |
-
_partial_: True
|
70 |
-
factor: 1.0
|
71 |
-
swa_params:
|
72 |
-
_target_: torchtnt.framework.auto_unit.SWAParams
|
73 |
-
warmup_steps_or_epochs: 0
|
74 |
-
step_or_epoch_update_freq: 1
|
75 |
-
averaging_method: ema
|
76 |
-
ema_decay: 0.9999
|
77 |
-
use_lit: True
|
78 |
-
precision: ${constants.precision}
|
79 |
-
clip_grad_norm: 1.0
|
80 |
-
|
81 |
-
umm_metadata:
|
82 |
-
model_type_name: ads_genads_ldm
|
83 |
-
model_series_name: ads_genads_ldm
|
84 |
-
oncall: ai_genads
|
85 |
-
|
86 |
-
checkpoint:
|
87 |
-
checkpoint_dir: null
|
88 |
-
checkpoint_path: null
|
89 |
-
checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/train.yaml
DELETED
@@ -1,84 +0,0 @@
|
|
1 |
-
### Main entry for the training config in hydra.
|
2 |
-
### Only top level configurations can run, we decompose the full
|
3 |
-
### config to multiple subfolders for better reusability.
|
4 |
-
|
5 |
-
seed: 42
|
6 |
-
|
7 |
-
defaults:
|
8 |
-
- constants: base
|
9 |
-
- model: simple_vton_sd15
|
10 |
-
- trainer: base
|
11 |
-
- eval: base
|
12 |
-
- datasets:
|
13 |
-
- viton_hd_train
|
14 |
-
- viton_hd_test
|
15 |
-
- viton_hd_test_local
|
16 |
-
- dress_code_train
|
17 |
-
- dress_code_test
|
18 |
-
- deepfashion_train
|
19 |
-
- deepfashion_test
|
20 |
-
- _self_
|
21 |
-
|
22 |
-
train_dataset: ${datasets.viton_hd_train}
|
23 |
-
# train_dataset: ${datasets.dress_code_train}
|
24 |
-
# train_dataset: ${datasets.deepfashion_train}
|
25 |
-
eval_dataset: null
|
26 |
-
|
27 |
-
unit:
|
28 |
-
_target_: leffa.vton_unit.VtonUnit
|
29 |
-
_partial_: True
|
30 |
-
model: ${model}
|
31 |
-
# strategy: ddp
|
32 |
-
strategy:
|
33 |
-
_target_: leffa.utils.create_fsdp_strategy
|
34 |
-
sharding_strategy: SHARD_GRAD_OP
|
35 |
-
state_dict_type: SHARDED_STATE_DICT
|
36 |
-
mixed_precision:
|
37 |
-
param_dtype: ${constants.precision}
|
38 |
-
reduce_dtype: ${constants.precision}
|
39 |
-
cast_forward_inputs: True
|
40 |
-
class_paths:
|
41 |
-
# For VAE (first stage)
|
42 |
-
- diffusers.models.unets.unet_2d_blocks.DownEncoderBlock2D
|
43 |
-
- diffusers.models.unets.unet_2d_blocks.UNetMidBlock2D
|
44 |
-
- diffusers.models.unets.unet_2d_blocks.UpDecoderBlock2D
|
45 |
-
# For UNet (unet stage) IdmVton
|
46 |
-
- leffa.models.diffusion_model.attentionhacked_tryon.BasicTransformerBlock
|
47 |
-
- leffa.models.diffusion_model.attentionhacked_garment.BasicTransformerBlock
|
48 |
-
# For UNet (unet stage) CatVton
|
49 |
-
- diffusers.models.attention.BasicTransformerBlock
|
50 |
-
# For CLIP (condition stage)
|
51 |
-
- transformers.CLIPTextModel
|
52 |
-
- transformers.CLIPTextModelWithProjection
|
53 |
-
- transformers.CLIPVisionModelWithProjection
|
54 |
-
optim_fn:
|
55 |
-
_target_: torch.optim.AdamW
|
56 |
-
_partial_: True
|
57 |
-
lr: 1.0e-5
|
58 |
-
betas: [0.9, 0.999]
|
59 |
-
eps: 1.0e-8
|
60 |
-
weight_decay: 1.0e-2
|
61 |
-
amsgrad: false
|
62 |
-
lr_scheduler_fn:
|
63 |
-
_target_: torch.optim.lr_scheduler.ConstantLR
|
64 |
-
_partial_: True
|
65 |
-
factor: 1.0
|
66 |
-
swa_params:
|
67 |
-
_target_: torchtnt.framework.auto_unit.SWAParams
|
68 |
-
warmup_steps_or_epochs: 0
|
69 |
-
step_or_epoch_update_freq: 1
|
70 |
-
averaging_method: ema
|
71 |
-
ema_decay: 0.9999
|
72 |
-
use_lit: True
|
73 |
-
precision: ${constants.precision}
|
74 |
-
clip_grad_norm: 1.0
|
75 |
-
|
76 |
-
umm_metadata:
|
77 |
-
model_type_name: ads_genads_ldm
|
78 |
-
model_series_name: ads_genads_ldm
|
79 |
-
oncall: ai_genads
|
80 |
-
|
81 |
-
checkpoint:
|
82 |
-
checkpoint_dir: null
|
83 |
-
checkpoint_path: null
|
84 |
-
checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/train_local.yaml
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
## Starter configuration to be ran locally for testing purpose.
|
2 |
-
|
3 |
-
defaults:
|
4 |
-
- train
|
5 |
-
- _self_
|
6 |
-
|
7 |
-
trainer:
|
8 |
-
resume_from_last_ckpt: false
|
9 |
-
model_entity_id: null
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/train_mae.yaml
DELETED
@@ -1,159 +0,0 @@
|
|
1 |
-
constants:
|
2 |
-
img_size: 512
|
3 |
-
batch_size: 16
|
4 |
-
precision: fp32
|
5 |
-
max_epochs: 1000
|
6 |
-
max_steps: null
|
7 |
-
max_train_steps_per_epoch: null
|
8 |
-
evaluate_every_n_train_steps: null
|
9 |
-
evaluate_every_n_train_epochs: 10
|
10 |
-
max_eval_steps_per_eval_epoch: null
|
11 |
-
use_torchsnapshot: false
|
12 |
-
checkpoint_every_n_steps: 500
|
13 |
-
model:
|
14 |
-
_target_: leffa.models.idm_vton_model.Mae4BgGen
|
15 |
-
img_size: ${constants.img_size}
|
16 |
-
patch_size: 16
|
17 |
-
embed_dim: 1024
|
18 |
-
depth: 24
|
19 |
-
num_heads: 16
|
20 |
-
# pretrained_path: manifold://genads_models/tree/zijianzhou/model/mae/mae_pretrain_vit_large.pth
|
21 |
-
pretrained_path: null
|
22 |
-
bg_masking_type: min
|
23 |
-
trainer:
|
24 |
-
max_epochs: ${constants.max_epochs}
|
25 |
-
max_steps: ${constants.max_steps}
|
26 |
-
max_train_steps_per_epoch: ${constants.max_train_steps_per_epoch}
|
27 |
-
checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
|
28 |
-
model_entity_id: null
|
29 |
-
resume_from_last_ckpt: true
|
30 |
-
model_store_checkpoint_version: null
|
31 |
-
garbage_collector_interval: 5001
|
32 |
-
pretrained_weights: null
|
33 |
-
log_dir: manifold://fblearner_flow_run_metrics/tree/torchmultimodal/idm_vton/logs/
|
34 |
-
use_pt2: false
|
35 |
-
memory_snapshot: false
|
36 |
-
eval:
|
37 |
-
warmup_iters: 0
|
38 |
-
evaluate_every_n_train_steps: ${constants.evaluate_every_n_train_steps}
|
39 |
-
evaluate_every_n_train_epochs: ${constants.evaluate_every_n_train_epochs}
|
40 |
-
max_eval_steps_per_eval_epoch: ${constants.max_eval_steps_per_eval_epoch}
|
41 |
-
datasets:
|
42 |
-
mae_train:
|
43 |
-
dataset:
|
44 |
-
_target_: media_dataloader.api.EnrichingDataset
|
45 |
-
datasource:
|
46 |
-
_target_: media_dataloader.api.LazyHiveDataSource
|
47 |
-
namespace: ad_metrics
|
48 |
-
table: hybrid_3_0_1st_shein_data
|
49 |
-
partition_filter_predicate_list:
|
50 |
-
- ds = '2024-07-20'
|
51 |
-
enrichments:
|
52 |
-
- _target_: media_dataloader.api.media_lookups.EverstoreLookups
|
53 |
-
lookup_handle_to_media_columns:
|
54 |
-
everstore_handle: "image"
|
55 |
-
- _target_: media_dataloader.api.media_lookups.ManifoldLookups
|
56 |
-
lookup_handle_to_media_columns:
|
57 |
-
binary_mask_manifold_path: bg_mask
|
58 |
-
collate_fn:
|
59 |
-
- _target_: media_dataloader.api.Collate
|
60 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
61 |
-
image_field: image
|
62 |
-
blob_field: image
|
63 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
64 |
-
image_field: bg_mask
|
65 |
-
blob_field: bg_mask
|
66 |
-
- _target_: leffa.datasets.transform.MaeTransform
|
67 |
-
input_size: ${constants.img_size}
|
68 |
-
is_train: true
|
69 |
-
dataloader:
|
70 |
-
_target_: media_dataloader.api.StatefulDataLoader
|
71 |
-
dataset: ${datasets.mae_train.dataset}
|
72 |
-
batch_size: ${constants.batch_size}
|
73 |
-
num_workers: 8
|
74 |
-
prefetch_factor: 2
|
75 |
-
pin_memory: true
|
76 |
-
persistent_workers: true
|
77 |
-
multiprocessing_context: forkserver
|
78 |
-
mae_test:
|
79 |
-
dataset:
|
80 |
-
_target_: media_dataloader.api.EnrichingDataset
|
81 |
-
datasource:
|
82 |
-
_target_: media_dataloader.api.LazyHiveDataSource
|
83 |
-
namespace: ad_metrics
|
84 |
-
table: hybrid_3_0_1st_shein_data
|
85 |
-
partition_filter_predicate_list:
|
86 |
-
- ds = '2024-07-20'
|
87 |
-
enrichments:
|
88 |
-
- _target_: media_dataloader.api.media_lookups.EverstoreLookups
|
89 |
-
lookup_handle_to_media_columns:
|
90 |
-
everstore_handle: "image"
|
91 |
-
- _target_: media_dataloader.api.media_lookups.ManifoldLookups
|
92 |
-
lookup_handle_to_media_columns:
|
93 |
-
binary_mask_manifold_path: bg_mask
|
94 |
-
collate_fn:
|
95 |
-
- _target_: media_dataloader.api.Collate
|
96 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
97 |
-
image_field: image
|
98 |
-
blob_field: image
|
99 |
-
- _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
|
100 |
-
image_field: bg_mask
|
101 |
-
blob_field: bg_mask
|
102 |
-
- _target_: leffa.datasets.transform.MaeTransform
|
103 |
-
input_size: ${constants.img_size}
|
104 |
-
is_train: false
|
105 |
-
dataloader:
|
106 |
-
_target_: media_dataloader.api.StatefulDataLoader
|
107 |
-
dataset: ${datasets.mae_train.dataset}
|
108 |
-
batch_size: ${constants.batch_size}
|
109 |
-
num_workers: 0
|
110 |
-
prefetch_factor: null
|
111 |
-
pin_memory: true
|
112 |
-
persistent_workers: false
|
113 |
-
multiprocessing_context: null
|
114 |
-
seed: 42
|
115 |
-
train_dataset: ${datasets.mae_train}
|
116 |
-
eval_dataset: null
|
117 |
-
# eval_dataset: ${datasets.mae_test}
|
118 |
-
unit:
|
119 |
-
_target_: leffa.vton_unit.VtonUnit
|
120 |
-
_partial_: true
|
121 |
-
model: ${model}
|
122 |
-
strategy: ddp
|
123 |
-
# strategy:
|
124 |
-
# _target_: leffa.utils.create_fsdp_strategy
|
125 |
-
# sharding_strategy: FULL_SHARD
|
126 |
-
# state_dict_type: SHARDED_STATE_DICT
|
127 |
-
# class_paths:
|
128 |
-
# - leffa.models.idm_vton_model.MaskedAutoencoderViT
|
129 |
-
optim_fn:
|
130 |
-
_target_: torch.optim.AdamW
|
131 |
-
_partial_: true
|
132 |
-
lr: 1.0e-05
|
133 |
-
betas:
|
134 |
-
- 0.9
|
135 |
-
- 0.999
|
136 |
-
eps: 1.0e-08
|
137 |
-
weight_decay: 0.01
|
138 |
-
amsgrad: false
|
139 |
-
lr_scheduler_fn:
|
140 |
-
_target_: torch.optim.lr_scheduler.ConstantLR
|
141 |
-
_partial_: true
|
142 |
-
factor: 1.0
|
143 |
-
swa_params:
|
144 |
-
_target_: torchtnt.framework.auto_unit.SWAParams
|
145 |
-
warmup_steps_or_epochs: 0
|
146 |
-
step_or_epoch_update_freq: 1
|
147 |
-
averaging_method: ema
|
148 |
-
ema_decay: 0.9999
|
149 |
-
use_lit: true
|
150 |
-
precision: ${constants.precision}
|
151 |
-
clip_grad_norm: 1.0
|
152 |
-
umm_metadata:
|
153 |
-
model_type_name: ads_genads_ldm
|
154 |
-
model_series_name: ads_genads_ldm
|
155 |
-
oncall: ai_genads
|
156 |
-
checkpoint:
|
157 |
-
checkpoint_dir: null
|
158 |
-
checkpoint_path: null
|
159 |
-
checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
leffa/conf/trainer/base.yaml
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
max_epochs: ${constants.max_epochs}
|
2 |
-
max_steps: ${constants.max_steps}
|
3 |
-
max_train_steps_per_epoch: ${constants.max_train_steps_per_epoch}
|
4 |
-
|
5 |
-
checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
|
6 |
-
|
7 |
-
resume_from_last_ckpt: True
|
8 |
-
model_entity_id: null
|
9 |
-
model_store_checkpoint_version: null
|
10 |
-
garbage_collector_interval: 5001
|
11 |
-
pretrained_weights: null
|
12 |
-
log_dir: manifold://fblearner_flow_run_metrics/tree/torchmultimodal/idm_vton/logs
|
13 |
-
use_pt2: False
|
14 |
-
memory_snapshot: False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|