krishnateja95 commited on
Commit
519e379
·
verified ·
1 Parent(s): 507648e

config files

Browse files
Files changed (2) hide show
  1. config.json +178 -0
  2. generation_config.json +4 -0
config.json ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepseek-ai/deepseek-vl2-tiny",
3
+ "architectures": [
4
+ "DeepseekVLV2ForCausalLM"
5
+ ],
6
+ "candidate_resolutions": [
7
+ [
8
+ 384,
9
+ 384
10
+ ],
11
+ [
12
+ 384,
13
+ 768
14
+ ],
15
+ [
16
+ 768,
17
+ 384
18
+ ],
19
+ [
20
+ 384,
21
+ 1152
22
+ ],
23
+ [
24
+ 1152,
25
+ 384
26
+ ],
27
+ [
28
+ 384,
29
+ 1536
30
+ ],
31
+ [
32
+ 1536,
33
+ 384
34
+ ],
35
+ [
36
+ 768,
37
+ 768
38
+ ],
39
+ [
40
+ 384,
41
+ 1920
42
+ ],
43
+ [
44
+ 1920,
45
+ 384
46
+ ],
47
+ [
48
+ 384,
49
+ 2304
50
+ ],
51
+ [
52
+ 2304,
53
+ 384
54
+ ],
55
+ [
56
+ 768,
57
+ 1152
58
+ ],
59
+ [
60
+ 1152,
61
+ 768
62
+ ],
63
+ [
64
+ 384,
65
+ 2688
66
+ ],
67
+ [
68
+ 2688,
69
+ 384
70
+ ],
71
+ [
72
+ 384,
73
+ 3072
74
+ ],
75
+ [
76
+ 3072,
77
+ 384
78
+ ],
79
+ [
80
+ 768,
81
+ 1536
82
+ ],
83
+ [
84
+ 1536,
85
+ 768
86
+ ],
87
+ [
88
+ 384,
89
+ 3456
90
+ ],
91
+ [
92
+ 3456,
93
+ 384
94
+ ],
95
+ [
96
+ 1152,
97
+ 1152
98
+ ]
99
+ ],
100
+ "global_view_pos": "head",
101
+ "language_config": {
102
+ "architectures": [
103
+ "DeepseekV2ForCausalLM"
104
+ ],
105
+ "auto_map": {
106
+ "AutoConfig": "configuration_deepseek.DeepseekV2Config",
107
+ "AutoModel": "modeling_deepseek.DeepseekV2Model",
108
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
109
+ },
110
+ "bos_token_id": 0,
111
+ "eos_token_id": 1,
112
+ "first_k_dense_replace": 1,
113
+ "hidden_size": 1280,
114
+ "intermediate_size": 6848,
115
+ "kv_lora_rank": null,
116
+ "lm_head": true,
117
+ "max_position_embeddings": 4096,
118
+ "model_type": "deepseek_v2",
119
+ "moe_intermediate_size": 896,
120
+ "n_group": 1,
121
+ "n_routed_experts": 64,
122
+ "n_shared_experts": 2,
123
+ "num_attention_heads": 10,
124
+ "num_experts_per_tok": 6,
125
+ "num_hidden_layers": 12,
126
+ "num_key_value_heads": 10,
127
+ "q_lora_rank": null,
128
+ "qk_nope_head_dim": 0,
129
+ "qk_rope_head_dim": 0,
130
+ "rm_head": false,
131
+ "topk_group": 1,
132
+ "topk_method": "greedy",
133
+ "torch_dtype": "bfloat16",
134
+ "use_mla": false,
135
+ "v_head_dim": 0,
136
+ "vocab_size": 129280
137
+ },
138
+ "model_type": "deepseek_vl_v2",
139
+ "projector_config": {
140
+ "model_type": "mlp_projector",
141
+ "n_embed": 1280
142
+ },
143
+ "quantization_config": {
144
+ "amp": true,
145
+ "autoround_version": "0.4.6",
146
+ "backend": "auto_round:gptq:exllamav2",
147
+ "batch_size": 8,
148
+ "bits": 4,
149
+ "data_type": "int",
150
+ "dataset": "NeelNanda/pile-10k",
151
+ "enable_minmax_tuning": true,
152
+ "enable_norm_bias_tuning": false,
153
+ "enable_quanted_input": true,
154
+ "gradient_accumulate_steps": 1,
155
+ "group_size": 64,
156
+ "iters": 2,
157
+ "low_gpu_mem_usage": false,
158
+ "lr": 0.5,
159
+ "minmax_lr": 0.5,
160
+ "nsamples": 8,
161
+ "quant_method": "intel/auto-round",
162
+ "scale_dtype": "torch.float16",
163
+ "seqlen": 2048,
164
+ "sym": true,
165
+ "to_quant_block_names": "language.model.layers"
166
+ },
167
+ "tile_tag": "2D",
168
+ "torch_dtype": "float16",
169
+ "transformers_version": "4.40.0",
170
+ "vision_config": {
171
+ "layers": 27,
172
+ "mlp_ratio": 3.7362,
173
+ "model_name": "siglip_so400m_patch14_384",
174
+ "model_type": "vision",
175
+ "patch_size": 14,
176
+ "width": 1152
177
+ }
178
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.40.0"
4
+ }