rsxdalv commited on May 20

Commit

f8f08f0

verified ·

1 Parent(s): 958cbb6

Upload 36 files

Browse files

Files changed (36) hide show

big-bf16/coarse.safetensors +3 -0
big-bf16/coarse_model_config.json +17 -0
big-bf16/fine.safetensors +3 -0
big-bf16/fine_model_config.json +19 -0
big-bf16/text.safetensors +3 -0
big-bf16/text_model_config.json +17 -0
big-pth/coarse.pth +3 -0
big-pth/coarse_model_config.json +16 -0
big-pth/fine.pth +3 -0
big-pth/fine_model_config.json +18 -0
big-pth/text.pth +3 -0
big-pth/text_model_config.json +16 -0
big/coarse.safetensors +3 -0
big/coarse_model_config.json +16 -0
big/fine.safetensors +3 -0
big/fine_model_config.json +19 -0
big/text.safetensors +3 -0
big/text_model_config.json +16 -0
small-bf16/coarse.safetensors +3 -0
small-bf16/coarse_model_config.json +17 -0
small-bf16/fine.safetensors +3 -0
small-bf16/fine_model_config.json +19 -0
small-bf16/text.safetensors +3 -0
small-bf16/text_model_config.json +17 -0
small-pth/coarse.pth +3 -0
small-pth/coarse_model_config.json +16 -0
small-pth/fine.pth +3 -0
small-pth/fine_model_config.json +18 -0
small-pth/text.pth +3 -0
small-pth/text_model_config.json +16 -0
small/coarse.safetensors +3 -0
small/coarse_model_config.json +16 -0
small/fine.safetensors +3 -0
small/fine_model_config.json +18 -0
small/text.safetensors +3 -0
small/text_model_config.json +16 -0

big-bf16/coarse.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a1c1257051eaeacda3acfd833ecd5fc55c65c6e4ef15c13c75a4bae16b9fa1b
+size 655738248

big-bf16/coarse_model_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "input_vocab_size": 12096,
+    "output_vocab_size": 12096
+  },
+  "model_type": "coarse",
+  "parameter_count": 327861248,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.9014446608225506,
+  "torch_dtype": "bfloat16"
+}

big-bf16/fine.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3475ebae4c583f2188d5c92bd04f7135e1cdbf2d1f0a272009cc47fe4896611
+size 623600536

big-bf16/fine_model_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "n_codes_total": 8,
+    "n_codes_given": 1,
+    "input_vocab_size": 1056,
+    "output_vocab_size": 1056
+  },
+  "model_type": "fine",
+  "parameter_count": 319358976,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.0785889791647594,
+  "torch_dtype": "bfloat16"
+}

big-bf16/text.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fda74920a99d0f508fd931ce95a8842858f45e79a2e3e9baa24e3d7f3c12f405
+size 892192136

big-bf16/text_model_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "input_vocab_size": 129600,
+    "output_vocab_size": 10048,
+    "dropout": 0.0
+  },
+  "model_type": "text",
+  "parameter_count": 446088192,
+  "needs_tokenizer": true,
+  "best_val_loss": 1.2688672196865083,
+  "torch_dtype": "bfloat16"
+}

big-pth/coarse.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7abe483e16dfa491adafd170b3aa2569006d2326065f31e81535b0d92915915
+size 1311503218

big-pth/coarse_model_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "input_vocab_size": 12096,
+    "output_vocab_size": 12096
+  },
+  "model_type": "coarse",
+  "parameter_count": 327861248,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.9014446608225506
+}

big-pth/fine.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:430efa5b7bdb934ad5e2ab64f28576c89bf757b2de193a743a4aaa9d78618bf9
+size 1247225967

big-pth/fine_model_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "n_codes_total": 8,
+    "n_codes_given": 1,
+    "input_vocab_size": 1056,
+    "output_vocab_size": 1056
+  },
+  "model_type": "fine",
+  "parameter_count": 319358976,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.0785889791647594
+}

big-pth/text.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:204faeacaa40c977823c7826695e4729f09c5dea640d202b651e02ae2872deaa
+size 1784407618

big-pth/text_model_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "input_vocab_size": 129600,
+    "output_vocab_size": 10048,
+    "dropout": 0.0
+  },
+  "model_type": "text",
+  "parameter_count": 446088192,
+  "needs_tokenizer": true,
+  "best_val_loss": 1.2688672196865083
+}

big/coarse.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ea4eb15f6b9baed2edd4684a055df96d629445c2f84dcb222b1789714dae006
+size 1311460680

big/coarse_model_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "input_vocab_size": 12096,
+    "output_vocab_size": 12096
+  },
+  "model_type": "coarse",
+  "parameter_count": 327861248,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.9014446608225506
+}

big/fine.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac3059d910be9eca177c5d8be60b7ffe338b18b5df3c9d2dcd81f7c496cb5ff1
+size 1247179592

big/fine_model_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "n_codes_total": 8,
+    "n_codes_given": 1,
+    "input_vocab_size": 1056,
+    "output_vocab_size": 1056
+  },
+  "model_type": "fine",
+  "torch_dtype": "float32",
+  "parameter_count": 319358976,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.0785889791647594
+}

big/text.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71e30646e1ce1aa30cb7271399599df35d73c588b8ff1e65fabd197c942fe938
+size 1784368456

big/text_model_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "model_config": {
+    "n_layer": 24,
+    "n_head": 16,
+    "n_embd": 1024,
+    "block_size": 1024,
+    "bias": false,
+    "input_vocab_size": 129600,
+    "output_vocab_size": 10048,
+    "dropout": 0.0
+  },
+  "model_type": "text",
+  "parameter_count": 446088192,
+  "needs_tokenizer": true,
+  "best_val_loss": 1.2688672196865083
+}

small-bf16/coarse.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbf2c48600e87b58606c6e6d23291b3167f5fbacf30b0df6c200b460d448dcca
+size 208647384

small-bf16/coarse_model_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "input_vocab_size": 12096,
+    "output_vocab_size": 12096
+  },
+  "model_type": "coarse",
+  "parameter_count": 104319744,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.9895273513793947,
+  "torch_dtype": "bfloat16"
+}

small-bf16/fine.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c582d992123bc3cb7bd4cad0c100702d39ba98b419cc8986622f24b41642f28
+size 184506264

small-bf16/fine_model_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "n_codes_total": 8,
+    "n_codes_given": 1,
+    "input_vocab_size": 1056,
+    "output_vocab_size": 1056
+  },
+  "model_type": "fine",
+  "parameter_count": 97924608,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.5150986952781675,
+  "torch_dtype": "bfloat16"
+}

small-bf16/text.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7eccd0aff1a5ed463e4af7f546161db9382a9cbab839612671c1819559a7b1fc
+size 385987800

small-bf16/text_model_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "input_vocab_size": 129600,
+    "output_vocab_size": 10048,
+    "dropout": 0.0
+  },
+  "model_type": "text",
+  "parameter_count": 192989952,
+  "needs_tokenizer": true,
+  "best_val_loss": 1.2919732055664062,
+  "torch_dtype": "bfloat16"
+}

small-pth/coarse.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce245eb9a72303373444e5c29a9c5b2e66ed8ddd2090040543cbf794dab06b12
+size 417307170

small-pth/coarse_model_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "input_vocab_size": 12096,
+    "output_vocab_size": 12096
+  },
+  "model_type": "coarse",
+  "parameter_count": 104319744,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.9895273513793947
+}

small-pth/fine.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:466f5d38dcb13370b57b4d04cd767de4ecf459dff18ad15c1c01d54472285a88
+size 369026191

small-pth/fine_model_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "n_codes_total": 8,
+    "n_codes_given": 1,
+    "input_vocab_size": 1056,
+    "output_vocab_size": 1056
+  },
+  "model_type": "fine",
+  "parameter_count": 97924608,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.5150986952781675
+}

small-pth/text.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba6029a68df2997c3847690df72ef4211e00a2bdbf5043c6c6fac6c928ec51b
+size 771987842

small-pth/text_model_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "input_vocab_size": 129600,
+    "output_vocab_size": 10048,
+    "dropout": 0.0
+  },
+  "model_type": "text",
+  "parameter_count": 192989952,
+  "needs_tokenizer": true,
+  "best_val_loss": 1.2919732055664062
+}

small/coarse.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5dd4b6e05ab7596b932b69a654765ab756b924cbe92bdad773f2508aa69cfe3
+size 417286848

small/coarse_model_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "input_vocab_size": 12096,
+    "output_vocab_size": 12096
+  },
+  "model_type": "coarse",
+  "parameter_count": 104319744,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.9895273513793947
+}

small/fine.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d18a2c9dacdf49fd8196a927a068286ae4f3ff7278380c5ac774eae42c2ef8dc
+size 369001320

small/fine_model_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "dropout": 0.0,
+    "n_codes_total": 8,
+    "n_codes_given": 1,
+    "input_vocab_size": 1056,
+    "output_vocab_size": 1056
+  },
+  "model_type": "fine",
+  "parameter_count": 97924608,
+  "needs_tokenizer": true,
+  "best_val_loss": 2.5150986952781675
+}

small/text.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35e759a15528351dd8d1ae8da5e0b4c6f2937e46145223fde5dfd2329b49cb05
+size 771967672

small/text_model_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "model_config": {
+    "n_layer": 12,
+    "n_head": 12,
+    "n_embd": 768,
+    "block_size": 1024,
+    "bias": false,
+    "input_vocab_size": 129600,
+    "output_vocab_size": 10048,
+    "dropout": 0.0
+  },
+  "model_type": "text",
+  "parameter_count": 192989952,
+  "needs_tokenizer": true,
+  "best_val_loss": 1.2919732055664062
+}