RichardErkhov commited on
Commit
70b8e99
1 Parent(s): 3507641

uploaded model

Browse files
Files changed (1) hide show
  1. config.json +58 -0
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_block_types": [
3
+ "recurrent",
4
+ "recurrent",
5
+ "attention"
6
+ ],
7
+ "_name_or_path": "recurrentgemma-2b-it",
8
+ "architectures": [
9
+ "RecurrentGemmaForCausalLM"
10
+ ],
11
+ "attention_bias": false,
12
+ "attention_dropout": 0.0,
13
+ "attention_window_size": 2048,
14
+ "block_types": [
15
+ "recurrent",
16
+ "recurrent",
17
+ "attention"
18
+ ],
19
+ "bos_token_id": 2,
20
+ "conv1d_width": 4,
21
+ "embeddings_scale_by_sqrt_dim": true,
22
+ "eos_token_id": 1,
23
+ "final_w_init_variance_scale": 0.07692307692307693,
24
+ "head_dim": 256,
25
+ "hidden_activation": "gelu_pytorch_tanh",
26
+ "hidden_size": 2560,
27
+ "intermediate_size": 15360,
28
+ "logits_soft_cap": 30.0,
29
+ "lru_width": 2560,
30
+ "model_type": "recurrent_gemma",
31
+ "num_attention_heads": 10,
32
+ "num_hidden_layers": 26,
33
+ "num_key_value_heads": 1,
34
+ "pad_token_id": 0,
35
+ "partial_rotary_factor": 0.5,
36
+ "quantization_config": {
37
+ "_load_in_4bit": false,
38
+ "_load_in_8bit": true,
39
+ "bnb_4bit_compute_dtype": "float32",
40
+ "bnb_4bit_quant_storage": "uint8",
41
+ "bnb_4bit_quant_type": "fp4",
42
+ "bnb_4bit_use_double_quant": false,
43
+ "llm_int8_enable_fp32_cpu_offload": false,
44
+ "llm_int8_has_fp16_weight": false,
45
+ "llm_int8_skip_modules": null,
46
+ "llm_int8_threshold": 6.0,
47
+ "load_in_4bit": false,
48
+ "load_in_8bit": true,
49
+ "quant_method": "bitsandbytes"
50
+ },
51
+ "rms_norm_eps": 1e-06,
52
+ "rope_theta": 10000.0,
53
+ "torch_dtype": "float16",
54
+ "transformers_version": "4.40.2",
55
+ "use_cache": true,
56
+ "vocab_size": 256000,
57
+ "w_init_variance_scale": 0.01
58
+ }