Upload 36 files
Browse files- big-bf16/coarse.safetensors +3 -0
- big-bf16/coarse_model_config.json +17 -0
- big-bf16/fine.safetensors +3 -0
- big-bf16/fine_model_config.json +19 -0
- big-bf16/text.safetensors +3 -0
- big-bf16/text_model_config.json +17 -0
- big-pth/coarse.pth +3 -0
- big-pth/coarse_model_config.json +16 -0
- big-pth/fine.pth +3 -0
- big-pth/fine_model_config.json +18 -0
- big-pth/text.pth +3 -0
- big-pth/text_model_config.json +16 -0
- big/coarse.safetensors +3 -0
- big/coarse_model_config.json +16 -0
- big/fine.safetensors +3 -0
- big/fine_model_config.json +19 -0
- big/text.safetensors +3 -0
- big/text_model_config.json +16 -0
- small-bf16/coarse.safetensors +3 -0
- small-bf16/coarse_model_config.json +17 -0
- small-bf16/fine.safetensors +3 -0
- small-bf16/fine_model_config.json +19 -0
- small-bf16/text.safetensors +3 -0
- small-bf16/text_model_config.json +17 -0
- small-pth/coarse.pth +3 -0
- small-pth/coarse_model_config.json +16 -0
- small-pth/fine.pth +3 -0
- small-pth/fine_model_config.json +18 -0
- small-pth/text.pth +3 -0
- small-pth/text_model_config.json +16 -0
- small/coarse.safetensors +3 -0
- small/coarse_model_config.json +16 -0
- small/fine.safetensors +3 -0
- small/fine_model_config.json +18 -0
- small/text.safetensors +3 -0
- small/text_model_config.json +16 -0
big-bf16/coarse.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a1c1257051eaeacda3acfd833ecd5fc55c65c6e4ef15c13c75a4bae16b9fa1b
|
3 |
+
size 655738248
|
big-bf16/coarse_model_config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"input_vocab_size": 12096,
|
10 |
+
"output_vocab_size": 12096
|
11 |
+
},
|
12 |
+
"model_type": "coarse",
|
13 |
+
"parameter_count": 327861248,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 2.9014446608225506,
|
16 |
+
"torch_dtype": "bfloat16"
|
17 |
+
}
|
big-bf16/fine.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3475ebae4c583f2188d5c92bd04f7135e1cdbf2d1f0a272009cc47fe4896611
|
3 |
+
size 623600536
|
big-bf16/fine_model_config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"n_codes_total": 8,
|
10 |
+
"n_codes_given": 1,
|
11 |
+
"input_vocab_size": 1056,
|
12 |
+
"output_vocab_size": 1056
|
13 |
+
},
|
14 |
+
"model_type": "fine",
|
15 |
+
"parameter_count": 319358976,
|
16 |
+
"needs_tokenizer": true,
|
17 |
+
"best_val_loss": 2.0785889791647594,
|
18 |
+
"torch_dtype": "bfloat16"
|
19 |
+
}
|
big-bf16/text.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fda74920a99d0f508fd931ce95a8842858f45e79a2e3e9baa24e3d7f3c12f405
|
3 |
+
size 892192136
|
big-bf16/text_model_config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"input_vocab_size": 129600,
|
9 |
+
"output_vocab_size": 10048,
|
10 |
+
"dropout": 0.0
|
11 |
+
},
|
12 |
+
"model_type": "text",
|
13 |
+
"parameter_count": 446088192,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 1.2688672196865083,
|
16 |
+
"torch_dtype": "bfloat16"
|
17 |
+
}
|
big-pth/coarse.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7abe483e16dfa491adafd170b3aa2569006d2326065f31e81535b0d92915915
|
3 |
+
size 1311503218
|
big-pth/coarse_model_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"input_vocab_size": 12096,
|
10 |
+
"output_vocab_size": 12096
|
11 |
+
},
|
12 |
+
"model_type": "coarse",
|
13 |
+
"parameter_count": 327861248,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 2.9014446608225506
|
16 |
+
}
|
big-pth/fine.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:430efa5b7bdb934ad5e2ab64f28576c89bf757b2de193a743a4aaa9d78618bf9
|
3 |
+
size 1247225967
|
big-pth/fine_model_config.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"n_codes_total": 8,
|
10 |
+
"n_codes_given": 1,
|
11 |
+
"input_vocab_size": 1056,
|
12 |
+
"output_vocab_size": 1056
|
13 |
+
},
|
14 |
+
"model_type": "fine",
|
15 |
+
"parameter_count": 319358976,
|
16 |
+
"needs_tokenizer": true,
|
17 |
+
"best_val_loss": 2.0785889791647594
|
18 |
+
}
|
big-pth/text.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:204faeacaa40c977823c7826695e4729f09c5dea640d202b651e02ae2872deaa
|
3 |
+
size 1784407618
|
big-pth/text_model_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"input_vocab_size": 129600,
|
9 |
+
"output_vocab_size": 10048,
|
10 |
+
"dropout": 0.0
|
11 |
+
},
|
12 |
+
"model_type": "text",
|
13 |
+
"parameter_count": 446088192,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 1.2688672196865083
|
16 |
+
}
|
big/coarse.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ea4eb15f6b9baed2edd4684a055df96d629445c2f84dcb222b1789714dae006
|
3 |
+
size 1311460680
|
big/coarse_model_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"input_vocab_size": 12096,
|
10 |
+
"output_vocab_size": 12096
|
11 |
+
},
|
12 |
+
"model_type": "coarse",
|
13 |
+
"parameter_count": 327861248,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 2.9014446608225506
|
16 |
+
}
|
big/fine.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac3059d910be9eca177c5d8be60b7ffe338b18b5df3c9d2dcd81f7c496cb5ff1
|
3 |
+
size 1247179592
|
big/fine_model_config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"n_codes_total": 8,
|
10 |
+
"n_codes_given": 1,
|
11 |
+
"input_vocab_size": 1056,
|
12 |
+
"output_vocab_size": 1056
|
13 |
+
},
|
14 |
+
"model_type": "fine",
|
15 |
+
"torch_dtype": "float32",
|
16 |
+
"parameter_count": 319358976,
|
17 |
+
"needs_tokenizer": true,
|
18 |
+
"best_val_loss": 2.0785889791647594
|
19 |
+
}
|
big/text.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71e30646e1ce1aa30cb7271399599df35d73c588b8ff1e65fabd197c942fe938
|
3 |
+
size 1784368456
|
big/text_model_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 24,
|
4 |
+
"n_head": 16,
|
5 |
+
"n_embd": 1024,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"input_vocab_size": 129600,
|
9 |
+
"output_vocab_size": 10048,
|
10 |
+
"dropout": 0.0
|
11 |
+
},
|
12 |
+
"model_type": "text",
|
13 |
+
"parameter_count": 446088192,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 1.2688672196865083
|
16 |
+
}
|
small-bf16/coarse.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbf2c48600e87b58606c6e6d23291b3167f5fbacf30b0df6c200b460d448dcca
|
3 |
+
size 208647384
|
small-bf16/coarse_model_config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"input_vocab_size": 12096,
|
10 |
+
"output_vocab_size": 12096
|
11 |
+
},
|
12 |
+
"model_type": "coarse",
|
13 |
+
"parameter_count": 104319744,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 2.9895273513793947,
|
16 |
+
"torch_dtype": "bfloat16"
|
17 |
+
}
|
small-bf16/fine.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c582d992123bc3cb7bd4cad0c100702d39ba98b419cc8986622f24b41642f28
|
3 |
+
size 184506264
|
small-bf16/fine_model_config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"n_codes_total": 8,
|
10 |
+
"n_codes_given": 1,
|
11 |
+
"input_vocab_size": 1056,
|
12 |
+
"output_vocab_size": 1056
|
13 |
+
},
|
14 |
+
"model_type": "fine",
|
15 |
+
"parameter_count": 97924608,
|
16 |
+
"needs_tokenizer": true,
|
17 |
+
"best_val_loss": 2.5150986952781675,
|
18 |
+
"torch_dtype": "bfloat16"
|
19 |
+
}
|
small-bf16/text.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7eccd0aff1a5ed463e4af7f546161db9382a9cbab839612671c1819559a7b1fc
|
3 |
+
size 385987800
|
small-bf16/text_model_config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"input_vocab_size": 129600,
|
9 |
+
"output_vocab_size": 10048,
|
10 |
+
"dropout": 0.0
|
11 |
+
},
|
12 |
+
"model_type": "text",
|
13 |
+
"parameter_count": 192989952,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 1.2919732055664062,
|
16 |
+
"torch_dtype": "bfloat16"
|
17 |
+
}
|
small-pth/coarse.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce245eb9a72303373444e5c29a9c5b2e66ed8ddd2090040543cbf794dab06b12
|
3 |
+
size 417307170
|
small-pth/coarse_model_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"input_vocab_size": 12096,
|
10 |
+
"output_vocab_size": 12096
|
11 |
+
},
|
12 |
+
"model_type": "coarse",
|
13 |
+
"parameter_count": 104319744,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 2.9895273513793947
|
16 |
+
}
|
small-pth/fine.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:466f5d38dcb13370b57b4d04cd767de4ecf459dff18ad15c1c01d54472285a88
|
3 |
+
size 369026191
|
small-pth/fine_model_config.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"n_codes_total": 8,
|
10 |
+
"n_codes_given": 1,
|
11 |
+
"input_vocab_size": 1056,
|
12 |
+
"output_vocab_size": 1056
|
13 |
+
},
|
14 |
+
"model_type": "fine",
|
15 |
+
"parameter_count": 97924608,
|
16 |
+
"needs_tokenizer": true,
|
17 |
+
"best_val_loss": 2.5150986952781675
|
18 |
+
}
|
small-pth/text.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aba6029a68df2997c3847690df72ef4211e00a2bdbf5043c6c6fac6c928ec51b
|
3 |
+
size 771987842
|
small-pth/text_model_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"input_vocab_size": 129600,
|
9 |
+
"output_vocab_size": 10048,
|
10 |
+
"dropout": 0.0
|
11 |
+
},
|
12 |
+
"model_type": "text",
|
13 |
+
"parameter_count": 192989952,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 1.2919732055664062
|
16 |
+
}
|
small/coarse.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5dd4b6e05ab7596b932b69a654765ab756b924cbe92bdad773f2508aa69cfe3
|
3 |
+
size 417286848
|
small/coarse_model_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"input_vocab_size": 12096,
|
10 |
+
"output_vocab_size": 12096
|
11 |
+
},
|
12 |
+
"model_type": "coarse",
|
13 |
+
"parameter_count": 104319744,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 2.9895273513793947
|
16 |
+
}
|
small/fine.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d18a2c9dacdf49fd8196a927a068286ae4f3ff7278380c5ac774eae42c2ef8dc
|
3 |
+
size 369001320
|
small/fine_model_config.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"n_codes_total": 8,
|
10 |
+
"n_codes_given": 1,
|
11 |
+
"input_vocab_size": 1056,
|
12 |
+
"output_vocab_size": 1056
|
13 |
+
},
|
14 |
+
"model_type": "fine",
|
15 |
+
"parameter_count": 97924608,
|
16 |
+
"needs_tokenizer": true,
|
17 |
+
"best_val_loss": 2.5150986952781675
|
18 |
+
}
|
small/text.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35e759a15528351dd8d1ae8da5e0b4c6f2937e46145223fde5dfd2329b49cb05
|
3 |
+
size 771967672
|
small/text_model_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_config": {
|
3 |
+
"n_layer": 12,
|
4 |
+
"n_head": 12,
|
5 |
+
"n_embd": 768,
|
6 |
+
"block_size": 1024,
|
7 |
+
"bias": false,
|
8 |
+
"input_vocab_size": 129600,
|
9 |
+
"output_vocab_size": 10048,
|
10 |
+
"dropout": 0.0
|
11 |
+
},
|
12 |
+
"model_type": "text",
|
13 |
+
"parameter_count": 192989952,
|
14 |
+
"needs_tokenizer": true,
|
15 |
+
"best_val_loss": 1.2919732055664062
|
16 |
+
}
|