Narsil HF Staff commited on
Commit
de2ebf8
·
verified ·
1 Parent(s): 25a4004

Upload folder using huggingface_hub

Browse files
models/hnet_1stage_L.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_layout": ["m4", ["T22"], "m4"],
3
+ "d_model": [1024, 1536],
4
+ "d_intermediate": [0, 4096],
5
+ "vocab_size": 256,
6
+ "ssm_cfg": {
7
+ "chunk_size": 256,
8
+ "d_conv": 4,
9
+ "d_state": 128,
10
+ "expand": 2
11
+ },
12
+ "attn_cfg": {
13
+ "num_heads": [16, 16],
14
+ "rotary_emb_dim": [32, 48],
15
+ "window_size": [1023, -1]
16
+ },
17
+ "tie_embeddings": false
18
+ }
models/hnet_1stage_L.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:313d9b8fc9adee06e15486862c9a78ddea73945ea03ff3ac3f6f79ebe205484f
3
+ size 2717648216
models/hnet_1stage_XL.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_layout": ["m4", ["T24"], "m4"],
3
+ "d_model": [1024, 2048],
4
+ "d_intermediate": [0, 5504],
5
+ "vocab_size": 256,
6
+ "ssm_cfg": {
7
+ "chunk_size": 256,
8
+ "d_conv": 4,
9
+ "d_state": 128,
10
+ "expand": 2
11
+ },
12
+ "attn_cfg": {
13
+ "num_heads": [16, 16],
14
+ "rotary_emb_dim": [32, 64],
15
+ "window_size": [1023, -1]
16
+ },
17
+ "tie_embeddings": false
18
+ }
models/hnet_2stage_L.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_layout": ["m4", ["T1m4", ["T26"], "m4T1"], "m4"],
3
+ "d_model": [1024, 1024, 1536],
4
+ "d_intermediate": [0, 2816, 4096],
5
+ "vocab_size": 256,
6
+ "ssm_cfg": {
7
+ "chunk_size": 256,
8
+ "d_conv": 4,
9
+ "d_state": 128,
10
+ "expand": 2
11
+ },
12
+ "attn_cfg": {
13
+ "num_heads": [16, 16, 16],
14
+ "rotary_emb_dim": [32, 32, 48],
15
+ "window_size": [1023, 1023, -1]
16
+ },
17
+ "tie_embeddings": false
18
+ }
models/hnet_2stage_L.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a5fa7cad65fcd853b6d185285c0f688a22b8a4c7e97137584415f972c735dc
3
+ size 3497304728
models/hnet_2stage_XL.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_layout": ["m4", ["T1m4", ["T27"], "m4T1"], "m4"],
3
+ "d_model": [1024, 1536, 2048],
4
+ "d_intermediate": [0, 4096, 5504],
5
+ "vocab_size": 256,
6
+ "ssm_cfg": {
7
+ "chunk_size": 256,
8
+ "d_conv": 4,
9
+ "d_state": 128,
10
+ "expand": 2
11
+ },
12
+ "attn_cfg": {
13
+ "num_heads": [16, 16, 16],
14
+ "rotary_emb_dim": [32, 48, 64],
15
+ "window_size": [1023, 1023, -1]
16
+ },
17
+ "tie_embeddings": false
18
+ }
models/hnet_2stage_XL_chinese.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_layout": ["m4", ["T1m4", ["T30"], "m4T1"], "m4"],
3
+ "d_model": [1024, 1536, 2048],
4
+ "d_intermediate": [0, 4096, 5504],
5
+ "vocab_size": 256,
6
+ "ssm_cfg": {
7
+ "chunk_size": 256,
8
+ "d_conv": 4,
9
+ "d_state": 128,
10
+ "expand": 2
11
+ },
12
+ "attn_cfg": {
13
+ "num_heads": [16, 16, 16],
14
+ "rotary_emb_dim": [32, 48, 64],
15
+ "window_size": [1023, 1023, -1]
16
+ },
17
+ "tie_embeddings": false
18
+ }
models/hnet_2stage_XL_code.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "arch_layout": ["m4", ["T1m4", ["T28"], "m4T1"], "m4"],
3
+ "d_model": [1024, 1536, 2048],
4
+ "d_intermediate": [0, 4096, 5504],
5
+ "vocab_size": 256,
6
+ "ssm_cfg": {
7
+ "chunk_size": 256,
8
+ "d_conv": 4,
9
+ "d_state": 128,
10
+ "expand": 2
11
+ },
12
+ "attn_cfg": {
13
+ "num_heads": [16, 16, 16],
14
+ "rotary_emb_dim": [32, 48, 64],
15
+ "window_size": [1023, 1023, -1]
16
+ },
17
+ "tie_embeddings": false
18
+ }