{ "_name_or_path": "ntu-spml/distilhubert", "activation_dropout": 0.1, "apply_spec_augment": false, "architectures": [ "HubertForSequenceClassification" ], "attention_dropout": 0.1, "bos_token_id": 1, "classifier_proj_size": 256, "conv_bias": false, "conv_dim": [ 512, 512, 512, 512, 512, 512, 512 ], "conv_kernel": [ 10, 3, 3, 3, 3, 2, 2 ], "conv_stride": [ 5, 2, 2, 2, 2, 2, 2 ], "ctc_loss_reduction": "sum", "ctc_zero_infinity": false, "do_stable_layer_norm": false, "eos_token_id": 2, "feat_extract_activation": "gelu", "feat_extract_norm": "group", "feat_proj_dropout": 0.0, "feat_proj_layer_norm": false, "final_dropout": 0.0, "hidden_act": "gelu", "hidden_dropout": 0.1, "hidden_size": 768, "id2label": { "0": "note0", "1": "note1", "2": "note2", "3": "note3", "4": "note4", "5": "note5", "6": "note6", "7": "note7", "8": "note8", "9": "note9", "10": "note10", "11": "note11", "12": "note12", "13": "note13", "14": "note14", "15": "note15", "16": "note16", "17": "note17", "18": "note18", "19": "note19", "20": "note20", "21": "note21", "22": "note22", "23": "note23", "24": "note24", "25": "note25", "26": "note26", "27": "note27", "28": "note28", "29": "note29", "30": "note30", "31": "note31", "32": "note32", "33": "note33", "34": "note34", "35": "note35", "36": "note36", "37": "note37", "38": "note38", "39": "note39", "40": "note40", "41": "note41", "42": "note42", "43": "note43", "44": "note44", "45": "note45", "46": "note46", "47": "note47", "48": "note48", "49": "note49", "50": "note50", "51": "note51", "52": "note52", "53": "note53", "54": "note54", "55": "note55", "56": "note56", "57": "note57", "58": "note58", "59": "note59", "60": "note60", "61": "note61", "62": "note62", "63": "note63", "64": "note64", "65": "note65", "66": "note66", "67": "note67", "68": "note68", "69": "note69", "70": "note70", "71": "note71", "72": "note72", "73": "note73", "74": "note74", "75": "note75", "76": "note76", "77": "note77", "78": "note78", "79": "note79", "80": "note80", "81": "note81", "82": "note82", "83": "note83", "84": "note84", "85": "note85", "86": "note86", "87": "note87", "88": "note88", "89": "note89", "90": "note90", "91": "note91", "92": "note92", "93": "note93", "94": "note94", "95": "note95", "96": "note96", "97": "note97", "98": "note98", "99": "note99", "100": "note100", "101": "note101", "102": "note102", "103": "note103", "104": "note104", "105": "note105", "106": "note106", "107": "note107", "108": "note108", "109": "note109", "110": "note110", "111": "note111", "112": "note112", "113": "note113", "114": "note114", "115": "note115", "116": "note116", "117": "note117", "118": "note118", "119": "note119", "120": "note120", "121": "note121", "122": "note122", "123": "note123", "124": "note124", "125": "note125", "126": "note126", "127": "note127", "128": "velocity1", "129": "velocity2", "130": "velocity3", "131": "velocity4", "132": "velocity5", "133": "velocity6", "134": "velocity7", "135": "velocity8", "136": "velocity9", "137": "velocity10", "138": "timbre0", "139": "timbre1", "140": "timbre2", "141": "timbre3", "142": "timbre4", "143": "timbre5", "144": "timbre6", "145": "timbre7", "146": "timbre8", "147": "timbre9", "148": "timbre10", "149": "timbre11", "150": "timbre12", "151": "timbre13", "152": "timbre14", "153": "timbre15" }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "note0": 0, "note1": 1, "note10": 10, "note100": 100, "note101": 101, "note102": 102, "note103": 103, "note104": 104, "note105": 105, "note106": 106, "note107": 107, "note108": 108, "note109": 109, "note11": 11, "note110": 110, "note111": 111, "note112": 112, "note113": 113, "note114": 114, "note115": 115, "note116": 116, "note117": 117, "note118": 118, "note119": 119, "note12": 12, "note120": 120, "note121": 121, "note122": 122, "note123": 123, "note124": 124, "note125": 125, "note126": 126, "note127": 127, "note13": 13, "note14": 14, "note15": 15, "note16": 16, "note17": 17, "note18": 18, "note19": 19, "note2": 2, "note20": 20, "note21": 21, "note22": 22, "note23": 23, "note24": 24, "note25": 25, "note26": 26, "note27": 27, "note28": 28, "note29": 29, "note3": 3, "note30": 30, "note31": 31, "note32": 32, "note33": 33, "note34": 34, "note35": 35, "note36": 36, "note37": 37, "note38": 38, "note39": 39, "note4": 4, "note40": 40, "note41": 41, "note42": 42, "note43": 43, "note44": 44, "note45": 45, "note46": 46, "note47": 47, "note48": 48, "note49": 49, "note5": 5, "note50": 50, "note51": 51, "note52": 52, "note53": 53, "note54": 54, "note55": 55, "note56": 56, "note57": 57, "note58": 58, "note59": 59, "note6": 6, "note60": 60, "note61": 61, "note62": 62, "note63": 63, "note64": 64, "note65": 65, "note66": 66, "note67": 67, "note68": 68, "note69": 69, "note7": 7, "note70": 70, "note71": 71, "note72": 72, "note73": 73, "note74": 74, "note75": 75, "note76": 76, "note77": 77, "note78": 78, "note79": 79, "note8": 8, "note80": 80, "note81": 81, "note82": 82, "note83": 83, "note84": 84, "note85": 85, "note86": 86, "note87": 87, "note88": 88, "note89": 89, "note9": 9, "note90": 90, "note91": 91, "note92": 92, "note93": 93, "note94": 94, "note95": 95, "note96": 96, "note97": 97, "note98": 98, "note99": 99, "timbre0": 138, "timbre1": 139, "timbre10": 148, "timbre11": 149, "timbre12": 150, "timbre13": 151, "timbre14": 152, "timbre15": 153, "timbre2": 140, "timbre3": 141, "timbre4": 142, "timbre5": 143, "timbre6": 144, "timbre7": 145, "timbre8": 146, "timbre9": 147, "velocity1": 128, "velocity10": 137, "velocity2": 129, "velocity3": 130, "velocity4": 131, "velocity5": 132, "velocity6": 133, "velocity7": 134, "velocity8": 135, "velocity9": 136 }, "layer_norm_eps": 1e-05, "layerdrop": 0.0, "mask_feature_length": 10, "mask_feature_min_masks": 0, "mask_feature_prob": 0.0, "mask_time_length": 10, "mask_time_min_masks": 2, "mask_time_prob": 0.05, "model_type": "hubert", "num_attention_heads": 12, "num_conv_pos_embedding_groups": 16, "num_conv_pos_embeddings": 128, "num_feat_extract_layers": 7, "num_hidden_layers": 2, "pad_token_id": 0, "problem_type": "multi_label_classification", "torch_dtype": "float32", "transformers_version": "4.41.2", "use_weighted_layer_sum": false, "vocab_size": 32 }