metascroy commited on
Commit
a05b1b3
·
verified ·
1 Parent(s): c2b7796

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. config.json +151 -0
  3. model.pte +3 -0
  4. pytorch_model.bin +3 -0
.gitattributes CHANGED
@@ -38,3 +38,4 @@ qwen3-4b-1024-ctx.pte filter=lfs diff=lfs merge=lfs -text
38
  qwen3-4b-8da4w-1024-ctx.pte filter=lfs diff=lfs merge=lfs -text
39
  qwen3-4B-8da4w-1024-cxt.pte filter=lfs diff=lfs merge=lfs -text
40
  qwen3-4B-INT8-INT4-1024-cxt.pte filter=lfs diff=lfs merge=lfs -text
 
 
38
  qwen3-4b-8da4w-1024-ctx.pte filter=lfs diff=lfs merge=lfs -text
39
  qwen3-4B-8da4w-1024-cxt.pte filter=lfs diff=lfs merge=lfs -text
40
  qwen3-4B-INT8-INT4-1024-cxt.pte filter=lfs diff=lfs merge=lfs -text
41
+ model.pte filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 9728,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 40960,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "quantization_config": {
60
+ "include_input_output_embeddings": true,
61
+ "modules_to_not_convert": [],
62
+ "quant_method": "torchao",
63
+ "quant_type": {
64
+ "default": {
65
+ "_data": {
66
+ "module_fqn_to_config": {
67
+ "_default": {
68
+ "_data": {
69
+ "act_mapping_type": {
70
+ "_data": "ASYMMETRIC",
71
+ "_type": "MappingType"
72
+ },
73
+ "intx_packing_format": {
74
+ "_data": "UNPACKED_TO_INT8",
75
+ "_type": "IntxPackingFormat"
76
+ },
77
+ "layout": {
78
+ "_data": {},
79
+ "_type": "QDQLayout",
80
+ "_version": 1
81
+ },
82
+ "weight_dtype": {
83
+ "_data": "int4",
84
+ "_type": "torch.dtype"
85
+ },
86
+ "weight_granularity": {
87
+ "_data": {
88
+ "group_size": 32
89
+ },
90
+ "_type": "PerGroup",
91
+ "_version": 1
92
+ },
93
+ "weight_mapping_type": {
94
+ "_data": "SYMMETRIC",
95
+ "_type": "MappingType"
96
+ },
97
+ "weight_scale_dtype": null
98
+ },
99
+ "_type": "Int8DynamicActivationIntxWeightConfig",
100
+ "_version": 2
101
+ },
102
+ "model.embed_tokens": {
103
+ "_data": {
104
+ "granularity": {
105
+ "_data": {
106
+ "axis": 0
107
+ },
108
+ "_type": "PerAxis",
109
+ "_version": 1
110
+ },
111
+ "intx_packing_format": {
112
+ "_data": "UNPACKED_TO_INT8",
113
+ "_type": "IntxPackingFormat"
114
+ },
115
+ "layout": {
116
+ "_data": {},
117
+ "_type": "QDQLayout",
118
+ "_version": 1
119
+ },
120
+ "mapping_type": {
121
+ "_data": "SYMMETRIC",
122
+ "_type": "MappingType"
123
+ },
124
+ "scale_dtype": null,
125
+ "weight_dtype": {
126
+ "_data": "int8",
127
+ "_type": "torch.dtype"
128
+ }
129
+ },
130
+ "_type": "IntxWeightOnlyConfig",
131
+ "_version": 2
132
+ }
133
+ }
134
+ },
135
+ "_type": "ModuleFqnToConfig",
136
+ "_version": 1
137
+ }
138
+ },
139
+ "quant_type_kwargs": {},
140
+ "untie_embedding_weights": false
141
+ },
142
+ "rms_norm_eps": 1e-06,
143
+ "rope_scaling": null,
144
+ "rope_theta": 1000000,
145
+ "sliding_window": null,
146
+ "tie_word_embeddings": false,
147
+ "transformers_version": "4.57.0.dev0",
148
+ "use_cache": true,
149
+ "use_sliding_window": false,
150
+ "vocab_size": 151936
151
+ }
model.pte ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb8d47ce2fd8a0713e134d8f810475beabe9b59b4f8527f6a7069146f13ac67
3
+ size 2656433792
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2af09e30fe6009b1daa82aa9694b346f65c51d5609ebea72c68dba1a4864b274
3
+ size 4789478103