{ "_name_or_path": "MCG-NJU/videomae-base", "architectures": [ "VideoMAEForVideoClassification" ], "attention_probs_dropout_prob": 0.0, "decoder_hidden_size": 384, "decoder_intermediate_size": 1536, "decoder_num_attention_heads": 6, "decoder_num_hidden_layers": 4, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "Reject", "1": "Pass", "2": "Invisible", "3": "Absent", "4": "Torture", "5": "Permission", "6": "Dishonest", "7": "Insult", "8": "Complain", "9": "Hit", "10": "Save", "11": "Bring", "12": "Request", "13": "Start", "14": "Seperate", "15": "Talk", "16": "Turn off the light", "17": "Come", "18": "Lift", "19": "Advise", "20": "Encourage", "21": "Welding", "22": "Trouble", "23": "Crying", "24": "Whisper", "25": "Shave", "26": "Qurbani", "27": "Embrace", "28": "Dig", "29": "Making Noise", "30": "Bath", "31": "Hate", "32": "Sleep", "33": "Rotate", "34": "want", "35": "think", "36": "kiss", "37": "steal", "38": "try", "39": "tear" }, "image_size": 224, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "Absent": 3, "Advise": 19, "Bath": 30, "Bring": 11, "Come": 17, "Complain": 8, "Crying": 23, "Dig": 28, "Dishonest": 6, "Embrace": 27, "Encourage": 20, "Hate": 31, "Hit": 9, "Insult": 7, "Invisible": 2, "Lift": 18, "Making Noise": 29, "Pass": 1, "Permission": 5, "Qurbani": 26, "Reject": 0, "Request": 12, "Rotate": 33, "Save": 10, "Seperate": 14, "Shave": 25, "Sleep": 32, "Start": 13, "Talk": 15, "Torture": 4, "Trouble": 22, "Turn off the light": 16, "Welding": 21, "Whisper": 24, "kiss": 36, "steal": 37, "tear": 39, "think": 35, "try": 38, "want": 34 }, "layer_norm_eps": 1e-12, "model_type": "videomae", "norm_pix_loss": true, "num_attention_heads": 12, "num_channels": 3, "num_frames": 16, "num_hidden_layers": 12, "patch_size": 16, "problem_type": "single_label_classification", "qkv_bias": true, "torch_dtype": "float32", "transformers_version": "4.35.2", "tubelet_size": 2, "use_mean_pooling": false }