nofunstudio commited on
Commit
021b71e
·
verified ·
1 Parent(s): d6c5b51
Files changed (1) hide show
  1. config.json +169 -0
config.json ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": ["Gemma3nForConditionalGeneration"],
3
+ "audio_config": {
4
+ "conf_attention_chunk_size": 12,
5
+ "conf_attention_context_left": 13,
6
+ "conf_attention_context_right": 0,
7
+ "conf_attention_logit_cap": 50.0,
8
+ "conf_conv_kernel_size": 5,
9
+ "conf_num_attention_heads": 8,
10
+ "conf_num_hidden_layers": 12,
11
+ "conf_reduction_factor": 4,
12
+ "conf_residual_weight": 0.5,
13
+ "gradient_clipping": 10000000000.0,
14
+ "hidden_size": 1536,
15
+ "input_feat_size": 128,
16
+ "model_type": "gemma3n_audio",
17
+ "rms_norm_eps": 1e-6,
18
+ "sscp_conv_channel_size": [128, 32],
19
+ "sscp_conv_group_norm_eps": 0.001,
20
+ "sscp_conv_kernel_size": [
21
+ [3, 3],
22
+ [3, 3]
23
+ ],
24
+ "sscp_conv_stride_size": [
25
+ [2, 2],
26
+ [2, 2]
27
+ ],
28
+ "torch_dtype": "bfloat16",
29
+ "vocab_offset": 262272,
30
+ "vocab_size": 128
31
+ },
32
+ "audio_soft_tokens_per_image": 188,
33
+ "audio_token_id": 262273,
34
+ "boa_token_id": 256000,
35
+ "boi_token_id": 255999,
36
+ "bos_token_id": 2,
37
+ "eoa_token_id": 262272,
38
+ "eoi_token_id": 262144,
39
+ "eos_token_id": 106,
40
+ "image_token_id": 262145,
41
+ "initializer_range": 0.02,
42
+ "model_type": "gemma3n",
43
+ "hidden_size": 2048,
44
+ "num_attention_heads": 8,
45
+ "pad_token_id": 0,
46
+ "quantization_config": {
47
+ "_load_in_4bit": true,
48
+ "_load_in_8bit": false,
49
+ "bnb_4bit_compute_dtype": "bfloat16",
50
+ "bnb_4bit_quant_storage": "uint8",
51
+ "bnb_4bit_quant_type": "nf4",
52
+ "bnb_4bit_use_double_quant": true,
53
+ "llm_int8_enable_fp32_cpu_offload": false,
54
+ "llm_int8_has_fp16_weight": false,
55
+ "llm_int8_skip_modules": [
56
+ "embed_tokens",
57
+ "embedding",
58
+ "lm_head",
59
+ "multi_modal_projector",
60
+ "merger",
61
+ "modality_projection",
62
+ "correction_coefs",
63
+ "prediction_coefs",
64
+ "modality_router",
65
+ "embedding_projection",
66
+ "input_proj_linear",
67
+ "per_layer_projection",
68
+ "audio_tower",
69
+ "embed_vision",
70
+ "embed_audio",
71
+ "altup_projections",
72
+ "altup_unembed_projections"
73
+ ],
74
+ "llm_int8_threshold": 6.0,
75
+ "load_in_4bit": true,
76
+ "load_in_8bit": false,
77
+ "quant_method": "bitsandbytes"
78
+ },
79
+ "text_config": {
80
+ "activation_sparsity_pattern": [
81
+ 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.0, 0.0, 0.0,
82
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
83
+ 0.0, 0.0
84
+ ],
85
+ "altup_active_idx": 0,
86
+ "altup_coef_clip": 120.0,
87
+ "altup_correct_scale": true,
88
+ "altup_num_inputs": 4,
89
+ "attention_bias": false,
90
+ "attention_dropout": 0.0,
91
+ "final_logit_softcapping": 30.0,
92
+ "head_dim": 256,
93
+ "hidden_activation": "gelu_pytorch_tanh",
94
+ "hidden_size": 2048,
95
+ "hidden_size_per_layer_input": 256,
96
+ "initializer_range": 0.02,
97
+ "intermediate_size": [
98
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
99
+ 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
100
+ 8192, 8192, 8192, 8192, 8192, 8192
101
+ ],
102
+ "laurel_rank": 64,
103
+ "layer_types": [
104
+ "sliding_attention",
105
+ "sliding_attention",
106
+ "sliding_attention",
107
+ "sliding_attention",
108
+ "full_attention",
109
+ "sliding_attention",
110
+ "sliding_attention",
111
+ "sliding_attention",
112
+ "sliding_attention",
113
+ "full_attention",
114
+ "sliding_attention",
115
+ "sliding_attention",
116
+ "sliding_attention",
117
+ "sliding_attention",
118
+ "full_attention",
119
+ "sliding_attention",
120
+ "sliding_attention",
121
+ "sliding_attention",
122
+ "sliding_attention",
123
+ "full_attention",
124
+ "sliding_attention",
125
+ "sliding_attention",
126
+ "sliding_attention",
127
+ "sliding_attention",
128
+ "full_attention",
129
+ "sliding_attention",
130
+ "sliding_attention",
131
+ "sliding_attention",
132
+ "sliding_attention",
133
+ "full_attention"
134
+ ],
135
+ "max_position_embeddings": 32768,
136
+ "model_type": "gemma3n_text",
137
+ "num_attention_heads": 8,
138
+ "num_hidden_layers": 30,
139
+ "num_key_value_heads": 2,
140
+ "num_kv_shared_layers": 10,
141
+ "rms_norm_eps": 1e-6,
142
+ "rope_local_base_freq": 10000.0,
143
+ "rope_scaling": null,
144
+ "rope_theta": 1000000.0,
145
+ "sliding_window": 512,
146
+ "torch_dtype": "bfloat16",
147
+ "use_cache": true,
148
+ "vocab_size": 262400,
149
+ "vocab_size_per_layer_input": 262144
150
+ },
151
+ "torch_dtype": "bfloat16",
152
+ "transformers_version": "4.53.1",
153
+ "unsloth_fixed": true,
154
+ "vision_config": {
155
+ "architecture": "mobilenetv5_300m_enc",
156
+ "do_pooling": false,
157
+ "hidden_size": 2048,
158
+ "initializer_range": 0.02,
159
+ "label_names": ["LABEL_0", "LABEL_1"],
160
+ "model_args": null,
161
+ "model_type": "gemma3n_vision",
162
+ "num_classes": 2,
163
+ "rms_norm_eps": 1e-6,
164
+ "torch_dtype": "bfloat16",
165
+ "vocab_offset": 262144,
166
+ "vocab_size": 128
167
+ },
168
+ "vision_soft_tokens_per_image": 256
169
+ }