somosnlp
/

NoticIA-7B

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

NoticIA-7B / openchat-3.5-0106_LoRA.yaml

Iker's picture

Upload openchat-3.5-0106_LoRA.yaml

3786308 verified 7 months ago

history blame contribute delete

1.8 kB

	#Training args
	model_name_or_path: openchat/openchat-3.5-0106
	torch_dtype: bfloat16
	use_lora: true
	quantization: 4
	quantization_inference: null
	gradient_checkpointing: true
	force_auto_device_map: false
	use_flash_attention: true
	generation_config: generation_config.json
	stop_words:
	- "<\|end_of_turn\|>"
	- "GPT4 Correct User:"
	- "GPT4 Correct Assistant:"
	- "</s>"
	- "<s>"
	- "\\n"

	# dataset arguments
	train_datasets:
	- train
	validation_datasets:
	- validation
	test_datasets:
	- test

	max_seq_length: 8192
	generation_max_length: 8192
	prompt_loss_weight: 0.0

	# checkpoint settings
	output_dir: results/finetune/openchat-3.5-0106_Lora
	overwrite_output_dir: true
	load_best_model_at_end: false
	metric_for_best_model: eval_validation_predictions_validation/rouge
	greater_is_better: true
	save_strategy: "epoch"
	save_only_model: true
	save_total_limit: 1

	# evaluation
	do_train: true
	do_eval: true
	do_predict: true
	evaluation_strategy: "epoch"
	predict_with_generate: true
	evaluate_all_checkpoints: true

	# batch size: 2 batch size * 16 gradaccum * 2 GPUs = 64
	per_device_train_batch_size: 8
	per_device_eval_batch_size: 4
	gradient_accumulation_steps: 8
	generation_num_beams: 1

	# optimizer settings

	optim: adamw_torch_fused
	learning_rate: 0.0003
	weight_decay: 0.001
	num_train_epochs: 3
	lr_scheduler_type: cosine
	warmup_ratio: 0.1
	adam_beta1: 0.9
	adam_beta2: 0.95
	adam_epsilon: 1e-12

	# lora settings
	lora_r: 128
	lora_alpha: 256
	lora_dropout: 0.05
	lora_target_modules:
	- all

	# reporting
	logging_strategy: steps
	logging_first_step: true
	logging_steps: 5
	report_to: wandb
	run_name: "openchat-3.5-0106_Lora"
	disable_tqdm: false

	# hub settings
	push_to_hub: false
	resume_from_checkpoint: false

	# performance
	bf16: true
	fp16: false
	torch_compile: false
	ddp_find_unused_parameters: false