Spaces:

TMElyralab
/

MuseTalk

Runtime error

App Files Files Community

MuseTalk / musetalk /utils /dwpose /rtmpose-l_8xb32-270e_coco-ubody-wholebody-384x288.py

czk32611

Upload 68 files

cdee5b8 verified 7 months ago

raw

history blame

7.36 kB

	#_base_ = ['../../../_base_/default_runtime.py']
	_base_ = ['default_runtime.py']

	# runtime
	max_epochs = 270
	stage2_num_epochs = 30
	base_lr = 4e-3
	train_batch_size = 32
	val_batch_size = 32

	train_cfg = dict(max_epochs=max_epochs, val_interval=10)
	randomness = dict(seed=21)

	# optimizer
	optim_wrapper = dict(
	type='OptimWrapper',
	optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
	paramwise_cfg=dict(
	norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))

	# learning rate
	param_scheduler = [
	dict(
	type='LinearLR',
	start_factor=1.0e-5,
	by_epoch=False,
	begin=0,
	end=1000),
	dict(
	# use cosine lr from 150 to 300 epoch
	type='CosineAnnealingLR',
	eta_min=base_lr * 0.05,
	begin=max_epochs // 2,
	end=max_epochs,
	T_max=max_epochs // 2,
	by_epoch=True,
	convert_to_iter_based=True),
	]

	# automatically scaling LR based on the actual training batch size
	auto_scale_lr = dict(base_batch_size=512)

	# codec settings
	codec = dict(
	type='SimCCLabel',
	input_size=(288, 384),
	sigma=(6., 6.93),
	simcc_split_ratio=2.0,
	normalize=False,
	use_dark=False)

	# model settings
	model = dict(
	type='TopdownPoseEstimator',
	data_preprocessor=dict(
	type='PoseDataPreprocessor',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375],
	bgr_to_rgb=True),
	backbone=dict(
	_scope_='mmdet',
	type='CSPNeXt',
	arch='P5',
	expand_ratio=0.5,
	deepen_factor=1.,
	widen_factor=1.,
	out_indices=(4, ),
	channel_attention=True,
	norm_cfg=dict(type='SyncBN'),
	act_cfg=dict(type='SiLU'),
	init_cfg=dict(
	type='Pretrained',
	prefix='backbone.',
	checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
	'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa: E501
	)),
	head=dict(
	type='RTMCCHead',
	in_channels=1024,
	out_channels=133,
	input_size=codec['input_size'],
	in_featuremap_size=(9, 12),
	simcc_split_ratio=codec['simcc_split_ratio'],
	final_layer_kernel_size=7,
	gau_cfg=dict(
	hidden_dims=256,
	s=128,
	expansion_factor=2,
	dropout_rate=0.,
	drop_path=0.,
	act_fn='SiLU',
	use_rel_bias=False,
	pos_enc=False),
	loss=dict(
	type='KLDiscretLoss',
	use_target_weight=True,
	beta=10.,
	label_softmax=True),
	decoder=codec),
	test_cfg=dict(flip_test=True, ))

	# base dataset settings
	dataset_type = 'UBody2dDataset'
	data_mode = 'topdown'
	data_root = 'data/UBody/'

	backend_args = dict(backend='local')

	scenes = [
	'Magic_show', 'Entertainment', 'ConductMusic', 'Online_class', 'TalkShow',
	'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow', 'Singing',
	'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference'
	]

	train_datasets = [
	dict(
	type='CocoWholeBodyDataset',
	data_root='data/coco/',
	data_mode=data_mode,
	ann_file='annotations/coco_wholebody_train_v1.0.json',
	data_prefix=dict(img='train2017/'),
	pipeline=[])
	]

	for scene in scenes:
	train_dataset = dict(
	type=dataset_type,
	data_root=data_root,
	data_mode=data_mode,
	ann_file=f'annotations/{scene}/train_annotations.json',
	data_prefix=dict(img='images/'),
	pipeline=[],
	sample_interval=10)
	train_datasets.append(train_dataset)

	# pipelines
	train_pipeline = [
	dict(type='LoadImage', backend_args=backend_args),
	dict(type='GetBBoxCenterScale'),
	dict(type='RandomFlip', direction='horizontal'),
	dict(type='RandomHalfBody'),
	dict(
	type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90),
	dict(type='TopdownAffine', input_size=codec['input_size']),
	dict(type='mmdet.YOLOXHSVRandomAug'),
	dict(
	type='Albumentation',
	transforms=[
	dict(type='Blur', p=0.1),
	dict(type='MedianBlur', p=0.1),
	dict(
	type='CoarseDropout',
	max_holes=1,
	max_height=0.4,
	max_width=0.4,
	min_holes=1,
	min_height=0.2,
	min_width=0.2,
	p=1.0),
	]),
	dict(type='GenerateTarget', encoder=codec),
	dict(type='PackPoseInputs')
	]
	val_pipeline = [
	dict(type='LoadImage', backend_args=backend_args),
	dict(type='GetBBoxCenterScale'),
	dict(type='TopdownAffine', input_size=codec['input_size']),
	dict(type='PackPoseInputs')
	]

	train_pipeline_stage2 = [
	dict(type='LoadImage', backend_args=backend_args),
	dict(type='GetBBoxCenterScale'),
	dict(type='RandomFlip', direction='horizontal'),
	dict(type='RandomHalfBody'),
	dict(
	type='RandomBBoxTransform',
	shift_factor=0.,
	scale_factor=[0.5, 1.5],
	rotate_factor=90),
	dict(type='TopdownAffine', input_size=codec['input_size']),
	dict(type='mmdet.YOLOXHSVRandomAug'),
	dict(
	type='Albumentation',
	transforms=[
	dict(type='Blur', p=0.1),
	dict(type='MedianBlur', p=0.1),
	dict(
	type='CoarseDropout',
	max_holes=1,
	max_height=0.4,
	max_width=0.4,
	min_holes=1,
	min_height=0.2,
	min_width=0.2,
	p=0.5),
	]),
	dict(type='GenerateTarget', encoder=codec),
	dict(type='PackPoseInputs')
	]

	# data loaders
	train_dataloader = dict(
	batch_size=train_batch_size,
	num_workers=10,
	persistent_workers=True,
	sampler=dict(type='DefaultSampler', shuffle=True),
	dataset=dict(
	type='CombinedDataset',
	metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
	datasets=train_datasets,
	pipeline=train_pipeline,
	test_mode=False,
	))

	val_dataloader = dict(
	batch_size=val_batch_size,
	num_workers=10,
	persistent_workers=True,
	drop_last=False,
	sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
	dataset=dict(
	type='CocoWholeBodyDataset',
	data_root=data_root,
	data_mode=data_mode,
	ann_file='data/coco/annotations/coco_wholebody_val_v1.0.json',
	bbox_file='data/coco/person_detection_results/'
	'COCO_val2017_detections_AP_H_56_person.json',
	data_prefix=dict(img='coco/val2017/'),
	test_mode=True,
	pipeline=val_pipeline,
	))
	test_dataloader = val_dataloader

	# hooks
	default_hooks = dict(
	checkpoint=dict(
	save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))

	custom_hooks = [
	dict(
	type='EMAHook',
	ema_type='ExpMomentumEMA',
	momentum=0.0002,
	update_buffers=True,
	priority=49),
	dict(
	type='mmdet.PipelineSwitchHook',
	switch_epoch=max_epochs - stage2_num_epochs,
	switch_pipeline=train_pipeline_stage2)
	]

	# evaluators
	val_evaluator = dict(
	type='CocoWholeBodyMetric',
	ann_file='data/coco/annotations/coco_wholebody_val_v1.0.json')
	test_evaluator = val_evaluator