distilhubert-finetuned-dataset / config.json

Training in progress, epoch 1

8a8354c verified 6 months ago

8 kB

	{
	"_name_or_path": "ntu-spml/distilhubert",
	"activation_dropout": 0.1,
	"apply_spec_augment": false,
	"architectures": [
	"HubertForSequenceClassification"
	],
	"attention_dropout": 0.1,
	"bos_token_id": 1,
	"classifier_proj_size": 256,
	"conv_bias": false,
	"conv_dim": [
	512,
	512,
	512,
	512,
	512,
	512,
	512
	],
	"conv_kernel": [
	10,
	3,
	3,
	3,
	3,
	2,
	2
	],
	"conv_stride": [
	5,
	2,
	2,
	2,
	2,
	2,
	2
	],
	"ctc_loss_reduction": "sum",
	"ctc_zero_infinity": false,
	"do_stable_layer_norm": false,
	"eos_token_id": 2,
	"feat_extract_activation": "gelu",
	"feat_extract_norm": "group",
	"feat_proj_dropout": 0.0,
	"feat_proj_layer_norm": false,
	"final_dropout": 0.0,
	"hidden_act": "gelu",
	"hidden_dropout": 0.1,
	"hidden_size": 768,
	"id2label": {
	"0": "note0",
	"1": "note1",
	"2": "note2",
	"3": "note3",
	"4": "note4",
	"5": "note5",
	"6": "note6",
	"7": "note7",
	"8": "note8",
	"9": "note9",
	"10": "note10",
	"11": "note11",
	"12": "note12",
	"13": "note13",
	"14": "note14",
	"15": "note15",
	"16": "note16",
	"17": "note17",
	"18": "note18",
	"19": "note19",
	"20": "note20",
	"21": "note21",
	"22": "note22",
	"23": "note23",
	"24": "note24",
	"25": "note25",
	"26": "note26",
	"27": "note27",
	"28": "note28",
	"29": "note29",
	"30": "note30",
	"31": "note31",
	"32": "note32",
	"33": "note33",
	"34": "note34",
	"35": "note35",
	"36": "note36",
	"37": "note37",
	"38": "note38",
	"39": "note39",
	"40": "note40",
	"41": "note41",
	"42": "note42",
	"43": "note43",
	"44": "note44",
	"45": "note45",
	"46": "note46",
	"47": "note47",
	"48": "note48",
	"49": "note49",
	"50": "note50",
	"51": "note51",
	"52": "note52",
	"53": "note53",
	"54": "note54",
	"55": "note55",
	"56": "note56",
	"57": "note57",
	"58": "note58",
	"59": "note59",
	"60": "note60",
	"61": "note61",
	"62": "note62",
	"63": "note63",
	"64": "note64",
	"65": "note65",
	"66": "note66",
	"67": "note67",
	"68": "note68",
	"69": "note69",
	"70": "note70",
	"71": "note71",
	"72": "note72",
	"73": "note73",
	"74": "note74",
	"75": "note75",
	"76": "note76",
	"77": "note77",
	"78": "note78",
	"79": "note79",
	"80": "note80",
	"81": "note81",
	"82": "note82",
	"83": "note83",
	"84": "note84",
	"85": "note85",
	"86": "note86",
	"87": "note87",
	"88": "note88",
	"89": "note89",
	"90": "note90",
	"91": "note91",
	"92": "note92",
	"93": "note93",
	"94": "note94",
	"95": "note95",
	"96": "note96",
	"97": "note97",
	"98": "note98",
	"99": "note99",
	"100": "note100",
	"101": "note101",
	"102": "note102",
	"103": "note103",
	"104": "note104",
	"105": "note105",
	"106": "note106",
	"107": "note107",
	"108": "note108",
	"109": "note109",
	"110": "note110",
	"111": "note111",
	"112": "note112",
	"113": "note113",
	"114": "note114",
	"115": "note115",
	"116": "note116",
	"117": "note117",
	"118": "note118",
	"119": "note119",
	"120": "note120",
	"121": "note121",
	"122": "note122",
	"123": "note123",
	"124": "note124",
	"125": "note125",
	"126": "note126",
	"127": "note127",
	"128": "velocity1",
	"129": "velocity2",
	"130": "velocity3",
	"131": "velocity4",
	"132": "velocity5",
	"133": "velocity6",
	"134": "velocity7",
	"135": "velocity8",
	"136": "velocity9",
	"137": "velocity10",
	"138": "timbre0",
	"139": "timbre1",
	"140": "timbre2",
	"141": "timbre3",
	"142": "timbre4",
	"143": "timbre5",
	"144": "timbre6",
	"145": "timbre7",
	"146": "timbre8",
	"147": "timbre9",
	"148": "timbre10",
	"149": "timbre11",
	"150": "timbre12",
	"151": "timbre13",
	"152": "timbre14",
	"153": "timbre15"
	},
	"initializer_range": 0.02,
	"intermediate_size": 3072,
	"label2id": {
	"note0": 0,
	"note1": 1,
	"note10": 10,
	"note100": 100,
	"note101": 101,
	"note102": 102,
	"note103": 103,
	"note104": 104,
	"note105": 105,
	"note106": 106,
	"note107": 107,
	"note108": 108,
	"note109": 109,
	"note11": 11,
	"note110": 110,
	"note111": 111,
	"note112": 112,
	"note113": 113,
	"note114": 114,
	"note115": 115,
	"note116": 116,
	"note117": 117,
	"note118": 118,
	"note119": 119,
	"note12": 12,
	"note120": 120,
	"note121": 121,
	"note122": 122,
	"note123": 123,
	"note124": 124,
	"note125": 125,
	"note126": 126,
	"note127": 127,
	"note13": 13,
	"note14": 14,
	"note15": 15,
	"note16": 16,
	"note17": 17,
	"note18": 18,
	"note19": 19,
	"note2": 2,
	"note20": 20,
	"note21": 21,
	"note22": 22,
	"note23": 23,
	"note24": 24,
	"note25": 25,
	"note26": 26,
	"note27": 27,
	"note28": 28,
	"note29": 29,
	"note3": 3,
	"note30": 30,
	"note31": 31,
	"note32": 32,
	"note33": 33,
	"note34": 34,
	"note35": 35,
	"note36": 36,
	"note37": 37,
	"note38": 38,
	"note39": 39,
	"note4": 4,
	"note40": 40,
	"note41": 41,
	"note42": 42,
	"note43": 43,
	"note44": 44,
	"note45": 45,
	"note46": 46,
	"note47": 47,
	"note48": 48,
	"note49": 49,
	"note5": 5,
	"note50": 50,
	"note51": 51,
	"note52": 52,
	"note53": 53,
	"note54": 54,
	"note55": 55,
	"note56": 56,
	"note57": 57,
	"note58": 58,
	"note59": 59,
	"note6": 6,
	"note60": 60,
	"note61": 61,
	"note62": 62,
	"note63": 63,
	"note64": 64,
	"note65": 65,
	"note66": 66,
	"note67": 67,
	"note68": 68,
	"note69": 69,
	"note7": 7,
	"note70": 70,
	"note71": 71,
	"note72": 72,
	"note73": 73,
	"note74": 74,
	"note75": 75,
	"note76": 76,
	"note77": 77,
	"note78": 78,
	"note79": 79,
	"note8": 8,
	"note80": 80,
	"note81": 81,
	"note82": 82,
	"note83": 83,
	"note84": 84,
	"note85": 85,
	"note86": 86,
	"note87": 87,
	"note88": 88,
	"note89": 89,
	"note9": 9,
	"note90": 90,
	"note91": 91,
	"note92": 92,
	"note93": 93,
	"note94": 94,
	"note95": 95,
	"note96": 96,
	"note97": 97,
	"note98": 98,
	"note99": 99,
	"timbre0": 138,
	"timbre1": 139,
	"timbre10": 148,
	"timbre11": 149,
	"timbre12": 150,
	"timbre13": 151,
	"timbre14": 152,
	"timbre15": 153,
	"timbre2": 140,
	"timbre3": 141,
	"timbre4": 142,
	"timbre5": 143,
	"timbre6": 144,
	"timbre7": 145,
	"timbre8": 146,
	"timbre9": 147,
	"velocity1": 128,
	"velocity10": 137,
	"velocity2": 129,
	"velocity3": 130,
	"velocity4": 131,
	"velocity5": 132,
	"velocity6": 133,
	"velocity7": 134,
	"velocity8": 135,
	"velocity9": 136
	},
	"layer_norm_eps": 1e-05,
	"layerdrop": 0.0,
	"mask_feature_length": 10,
	"mask_feature_min_masks": 0,
	"mask_feature_prob": 0.0,
	"mask_time_length": 10,
	"mask_time_min_masks": 2,
	"mask_time_prob": 0.05,
	"model_type": "hubert",
	"num_attention_heads": 12,
	"num_conv_pos_embedding_groups": 16,
	"num_conv_pos_embeddings": 128,
	"num_feat_extract_layers": 7,
	"num_hidden_layers": 2,
	"pad_token_id": 0,
	"problem_type": "multi_label_classification",
	"torch_dtype": "float32",
	"transformers_version": "4.41.2",
	"use_weighted_layer_sum": false,
	"vocab_size": 32
	}