rsxdalv commited on
Commit
f8f08f0
·
verified ·
1 Parent(s): 958cbb6

Upload 36 files

Browse files
big-bf16/coarse.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1c1257051eaeacda3acfd833ecd5fc55c65c6e4ef15c13c75a4bae16b9fa1b
3
+ size 655738248
big-bf16/coarse_model_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "input_vocab_size": 12096,
10
+ "output_vocab_size": 12096
11
+ },
12
+ "model_type": "coarse",
13
+ "parameter_count": 327861248,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 2.9014446608225506,
16
+ "torch_dtype": "bfloat16"
17
+ }
big-bf16/fine.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3475ebae4c583f2188d5c92bd04f7135e1cdbf2d1f0a272009cc47fe4896611
3
+ size 623600536
big-bf16/fine_model_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "n_codes_total": 8,
10
+ "n_codes_given": 1,
11
+ "input_vocab_size": 1056,
12
+ "output_vocab_size": 1056
13
+ },
14
+ "model_type": "fine",
15
+ "parameter_count": 319358976,
16
+ "needs_tokenizer": true,
17
+ "best_val_loss": 2.0785889791647594,
18
+ "torch_dtype": "bfloat16"
19
+ }
big-bf16/text.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda74920a99d0f508fd931ce95a8842858f45e79a2e3e9baa24e3d7f3c12f405
3
+ size 892192136
big-bf16/text_model_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "input_vocab_size": 129600,
9
+ "output_vocab_size": 10048,
10
+ "dropout": 0.0
11
+ },
12
+ "model_type": "text",
13
+ "parameter_count": 446088192,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 1.2688672196865083,
16
+ "torch_dtype": "bfloat16"
17
+ }
big-pth/coarse.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7abe483e16dfa491adafd170b3aa2569006d2326065f31e81535b0d92915915
3
+ size 1311503218
big-pth/coarse_model_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "input_vocab_size": 12096,
10
+ "output_vocab_size": 12096
11
+ },
12
+ "model_type": "coarse",
13
+ "parameter_count": 327861248,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 2.9014446608225506
16
+ }
big-pth/fine.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:430efa5b7bdb934ad5e2ab64f28576c89bf757b2de193a743a4aaa9d78618bf9
3
+ size 1247225967
big-pth/fine_model_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "n_codes_total": 8,
10
+ "n_codes_given": 1,
11
+ "input_vocab_size": 1056,
12
+ "output_vocab_size": 1056
13
+ },
14
+ "model_type": "fine",
15
+ "parameter_count": 319358976,
16
+ "needs_tokenizer": true,
17
+ "best_val_loss": 2.0785889791647594
18
+ }
big-pth/text.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204faeacaa40c977823c7826695e4729f09c5dea640d202b651e02ae2872deaa
3
+ size 1784407618
big-pth/text_model_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "input_vocab_size": 129600,
9
+ "output_vocab_size": 10048,
10
+ "dropout": 0.0
11
+ },
12
+ "model_type": "text",
13
+ "parameter_count": 446088192,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 1.2688672196865083
16
+ }
big/coarse.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea4eb15f6b9baed2edd4684a055df96d629445c2f84dcb222b1789714dae006
3
+ size 1311460680
big/coarse_model_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "input_vocab_size": 12096,
10
+ "output_vocab_size": 12096
11
+ },
12
+ "model_type": "coarse",
13
+ "parameter_count": 327861248,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 2.9014446608225506
16
+ }
big/fine.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3059d910be9eca177c5d8be60b7ffe338b18b5df3c9d2dcd81f7c496cb5ff1
3
+ size 1247179592
big/fine_model_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "n_codes_total": 8,
10
+ "n_codes_given": 1,
11
+ "input_vocab_size": 1056,
12
+ "output_vocab_size": 1056
13
+ },
14
+ "model_type": "fine",
15
+ "torch_dtype": "float32",
16
+ "parameter_count": 319358976,
17
+ "needs_tokenizer": true,
18
+ "best_val_loss": 2.0785889791647594
19
+ }
big/text.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e30646e1ce1aa30cb7271399599df35d73c588b8ff1e65fabd197c942fe938
3
+ size 1784368456
big/text_model_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 24,
4
+ "n_head": 16,
5
+ "n_embd": 1024,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "input_vocab_size": 129600,
9
+ "output_vocab_size": 10048,
10
+ "dropout": 0.0
11
+ },
12
+ "model_type": "text",
13
+ "parameter_count": 446088192,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 1.2688672196865083
16
+ }
small-bf16/coarse.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf2c48600e87b58606c6e6d23291b3167f5fbacf30b0df6c200b460d448dcca
3
+ size 208647384
small-bf16/coarse_model_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "input_vocab_size": 12096,
10
+ "output_vocab_size": 12096
11
+ },
12
+ "model_type": "coarse",
13
+ "parameter_count": 104319744,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 2.9895273513793947,
16
+ "torch_dtype": "bfloat16"
17
+ }
small-bf16/fine.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c582d992123bc3cb7bd4cad0c100702d39ba98b419cc8986622f24b41642f28
3
+ size 184506264
small-bf16/fine_model_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "n_codes_total": 8,
10
+ "n_codes_given": 1,
11
+ "input_vocab_size": 1056,
12
+ "output_vocab_size": 1056
13
+ },
14
+ "model_type": "fine",
15
+ "parameter_count": 97924608,
16
+ "needs_tokenizer": true,
17
+ "best_val_loss": 2.5150986952781675,
18
+ "torch_dtype": "bfloat16"
19
+ }
small-bf16/text.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eccd0aff1a5ed463e4af7f546161db9382a9cbab839612671c1819559a7b1fc
3
+ size 385987800
small-bf16/text_model_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "input_vocab_size": 129600,
9
+ "output_vocab_size": 10048,
10
+ "dropout": 0.0
11
+ },
12
+ "model_type": "text",
13
+ "parameter_count": 192989952,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 1.2919732055664062,
16
+ "torch_dtype": "bfloat16"
17
+ }
small-pth/coarse.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce245eb9a72303373444e5c29a9c5b2e66ed8ddd2090040543cbf794dab06b12
3
+ size 417307170
small-pth/coarse_model_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "input_vocab_size": 12096,
10
+ "output_vocab_size": 12096
11
+ },
12
+ "model_type": "coarse",
13
+ "parameter_count": 104319744,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 2.9895273513793947
16
+ }
small-pth/fine.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466f5d38dcb13370b57b4d04cd767de4ecf459dff18ad15c1c01d54472285a88
3
+ size 369026191
small-pth/fine_model_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "n_codes_total": 8,
10
+ "n_codes_given": 1,
11
+ "input_vocab_size": 1056,
12
+ "output_vocab_size": 1056
13
+ },
14
+ "model_type": "fine",
15
+ "parameter_count": 97924608,
16
+ "needs_tokenizer": true,
17
+ "best_val_loss": 2.5150986952781675
18
+ }
small-pth/text.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba6029a68df2997c3847690df72ef4211e00a2bdbf5043c6c6fac6c928ec51b
3
+ size 771987842
small-pth/text_model_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "input_vocab_size": 129600,
9
+ "output_vocab_size": 10048,
10
+ "dropout": 0.0
11
+ },
12
+ "model_type": "text",
13
+ "parameter_count": 192989952,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 1.2919732055664062
16
+ }
small/coarse.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5dd4b6e05ab7596b932b69a654765ab756b924cbe92bdad773f2508aa69cfe3
3
+ size 417286848
small/coarse_model_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "input_vocab_size": 12096,
10
+ "output_vocab_size": 12096
11
+ },
12
+ "model_type": "coarse",
13
+ "parameter_count": 104319744,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 2.9895273513793947
16
+ }
small/fine.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d18a2c9dacdf49fd8196a927a068286ae4f3ff7278380c5ac774eae42c2ef8dc
3
+ size 369001320
small/fine_model_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "dropout": 0.0,
9
+ "n_codes_total": 8,
10
+ "n_codes_given": 1,
11
+ "input_vocab_size": 1056,
12
+ "output_vocab_size": 1056
13
+ },
14
+ "model_type": "fine",
15
+ "parameter_count": 97924608,
16
+ "needs_tokenizer": true,
17
+ "best_val_loss": 2.5150986952781675
18
+ }
small/text.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35e759a15528351dd8d1ae8da5e0b4c6f2937e46145223fde5dfd2329b49cb05
3
+ size 771967672
small/text_model_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "n_layer": 12,
4
+ "n_head": 12,
5
+ "n_embd": 768,
6
+ "block_size": 1024,
7
+ "bias": false,
8
+ "input_vocab_size": 129600,
9
+ "output_vocab_size": 10048,
10
+ "dropout": 0.0
11
+ },
12
+ "model_type": "text",
13
+ "parameter_count": 192989952,
14
+ "needs_tokenizer": true,
15
+ "best_val_loss": 1.2919732055664062
16
+ }