arxyzan commited on
Commit
17470f8
1 Parent(s): dd13632

Hezar: Upload model_config.yaml

Browse files
Files changed (1) hide show
  1. model_config.yaml +53 -0
model_config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: vit_roberta_image2text
2
+ config_type: model
3
+ encoder:
4
+ name: null
5
+ config_type: model
6
+ hidden_size: 768
7
+ num_hidden_layers: 12
8
+ num_attention_heads: 12
9
+ intermediate_size: 3072
10
+ hidden_act: gelu
11
+ hidden_dropout_prob: 0.0
12
+ attention_probs_dropout_prob: 0.0
13
+ initializer_range: 0.02
14
+ layer_norm_eps: 1.0e-12
15
+ image_size: 224
16
+ patch_size: 16
17
+ num_channels: 3
18
+ qkv_bias: true
19
+ encoder_stride: 16
20
+ decoder:
21
+ name: null
22
+ config_type: model
23
+ is_decoder: true
24
+ add_cross_attention: true
25
+ attention_probs_dropout_prob: 0.1
26
+ bos_token_id: 0
27
+ eos_token_id: 2
28
+ classifier_dropout: null
29
+ gradient_checkpointing: false
30
+ hidden_act: gelu
31
+ hidden_dropout_prob: 0.1
32
+ hidden_size: 768
33
+ initializer_range: 0.02
34
+ intermediate_size: 3072
35
+ layer_norm_eps: 1.0e-12
36
+ max_position_embeddings: 514
37
+ num_attention_heads: 12
38
+ num_hidden_layers: 12
39
+ pad_token_id: 1
40
+ position_embedding_type: absolute
41
+ type_vocab_size: 1
42
+ use_cache: true
43
+ vocab_size: 42000
44
+ generation:
45
+ bos_token_id: 0
46
+ decoder_start_token_id: 0
47
+ early_stopping: true
48
+ eos_token_id: 2
49
+ length_penalty: 2.0
50
+ max_length: 64
51
+ no_repeat_ngram_size: 3
52
+ num_beams: 4
53
+ pad_token_id: 1