Commit
·
90e0792
1
Parent(s):
0fe8392
Model save
Browse files- all_results.json +7 -7
- eval_results.json +4 -4
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699575456.ip-26-0-155-187.230765.0 +3 -0
- runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699587432.ip-26-0-155-187.230765.1 +3 -0
- train_results.json +3 -3
- trainer_state.json +16 -16
- training_args.bin +1 -1
all_results.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 0.67,
|
| 3 |
-
"eval_loss": 0.
|
| 4 |
-
"eval_runtime":
|
| 5 |
"eval_samples": 23110,
|
| 6 |
-
"eval_samples_per_second": 69.
|
| 7 |
-
"eval_steps_per_second": 0.
|
| 8 |
-
"train_loss": 0.
|
| 9 |
-
"train_runtime":
|
| 10 |
"train_samples": 207865,
|
| 11 |
-
"train_samples_per_second": 17.
|
| 12 |
"train_steps_per_second": 0.035
|
| 13 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 0.67,
|
| 3 |
+
"eval_loss": 0.932437002658844,
|
| 4 |
+
"eval_runtime": 331.0457,
|
| 5 |
"eval_samples": 23110,
|
| 6 |
+
"eval_samples_per_second": 69.809,
|
| 7 |
+
"eval_steps_per_second": 0.547,
|
| 8 |
+
"train_loss": 0.9717322877224754,
|
| 9 |
+
"train_runtime": 11645.5121,
|
| 10 |
"train_samples": 207865,
|
| 11 |
+
"train_samples_per_second": 17.849,
|
| 12 |
"train_steps_per_second": 0.035
|
| 13 |
}
|
eval_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 0.67,
|
| 3 |
-
"eval_loss": 0.
|
| 4 |
-
"eval_runtime":
|
| 5 |
"eval_samples": 23110,
|
| 6 |
-
"eval_samples_per_second": 69.
|
| 7 |
-
"eval_steps_per_second": 0.
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 0.67,
|
| 3 |
+
"eval_loss": 0.932437002658844,
|
| 4 |
+
"eval_runtime": 331.0457,
|
| 5 |
"eval_samples": 23110,
|
| 6 |
+
"eval_samples_per_second": 69.809,
|
| 7 |
+
"eval_steps_per_second": 0.547
|
| 8 |
}
|
model-00001-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4943162336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68b1e135ad66cee90fef5c9335e8c80b8e60b16254ff5f4e88d3369ebcce96a4
|
| 3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999819336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8df5a386b05f0393662aaa5d39d8dd052a22a366fe6f5cd42a0c7bc940898d6e
|
| 3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4540516344
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:202c8e90c2ac2219a17c1bce35622a063977658808c635d326433237022d2ea3
|
| 3 |
size 4540516344
|
runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699575456.ip-26-0-155-187.230765.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3189372cc8c6459ee460d3d31cbaddfb99a40696a85cc9050a23dea2a8e6a339
|
| 3 |
+
size 13430
|
runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699587432.ip-26-0-155-187.230765.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2868aa7d42743dcea1cc11cb77830e050620d5fc59cb3c0b74befa9a82dd81e
|
| 3 |
+
size 359
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 0.67,
|
| 3 |
-
"train_loss": 0.
|
| 4 |
-
"train_runtime":
|
| 5 |
"train_samples": 207865,
|
| 6 |
-
"train_samples_per_second": 17.
|
| 7 |
"train_steps_per_second": 0.035
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 0.67,
|
| 3 |
+
"train_loss": 0.9717322877224754,
|
| 4 |
+
"train_runtime": 11645.5121,
|
| 5 |
"train_samples": 207865,
|
| 6 |
+
"train_samples_per_second": 17.849,
|
| 7 |
"train_steps_per_second": 0.035
|
| 8 |
}
|
trainer_state.json
CHANGED
|
@@ -17,25 +17,25 @@
|
|
| 17 |
{
|
| 18 |
"epoch": 0.01,
|
| 19 |
"learning_rate": 1.999251652147735e-05,
|
| 20 |
-
"loss": 1.
|
| 21 |
"step": 5
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"epoch": 0.02,
|
| 25 |
"learning_rate": 1.997007728639956e-05,
|
| 26 |
-
"loss": 1.
|
| 27 |
"step": 10
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"epoch": 0.04,
|
| 31 |
"learning_rate": 1.9932715879473385e-05,
|
| 32 |
-
"loss": 1.
|
| 33 |
"step": 15
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"epoch": 0.05,
|
| 37 |
"learning_rate": 1.9880488219356086e-05,
|
| 38 |
-
"loss": 1.
|
| 39 |
"step": 20
|
| 40 |
},
|
| 41 |
{
|
|
@@ -59,7 +59,7 @@
|
|
| 59 |
{
|
| 60 |
"epoch": 0.1,
|
| 61 |
"learning_rate": 1.9524809490566878e-05,
|
| 62 |
-
"loss": 0.
|
| 63 |
"step": 40
|
| 64 |
},
|
| 65 |
{
|
|
@@ -71,7 +71,7 @@
|
|
| 71 |
{
|
| 72 |
"epoch": 0.12,
|
| 73 |
"learning_rate": 1.926084840336821e-05,
|
| 74 |
-
"loss": 0.
|
| 75 |
"step": 50
|
| 76 |
},
|
| 77 |
{
|
|
@@ -113,7 +113,7 @@
|
|
| 113 |
{
|
| 114 |
"epoch": 0.21,
|
| 115 |
"learning_rate": 1.791386494010081e-05,
|
| 116 |
-
"loss": 0.
|
| 117 |
"step": 85
|
| 118 |
},
|
| 119 |
{
|
|
@@ -125,7 +125,7 @@
|
|
| 125 |
{
|
| 126 |
"epoch": 0.23,
|
| 127 |
"learning_rate": 1.7417625312098453e-05,
|
| 128 |
-
"loss": 0.
|
| 129 |
"step": 95
|
| 130 |
},
|
| 131 |
{
|
|
@@ -143,13 +143,13 @@
|
|
| 143 |
{
|
| 144 |
"epoch": 0.27,
|
| 145 |
"learning_rate": 1.659103377877423e-05,
|
| 146 |
-
"loss": 0.
|
| 147 |
"step": 110
|
| 148 |
},
|
| 149 |
{
|
| 150 |
"epoch": 0.28,
|
| 151 |
"learning_rate": 1.629520819706912e-05,
|
| 152 |
-
"loss": 0.
|
| 153 |
"step": 115
|
| 154 |
},
|
| 155 |
{
|
|
@@ -340,9 +340,9 @@
|
|
| 340 |
},
|
| 341 |
{
|
| 342 |
"epoch": 0.67,
|
| 343 |
-
"eval_loss": 0.
|
| 344 |
-
"eval_runtime": 337.
|
| 345 |
-
"eval_samples_per_second": 68.
|
| 346 |
"eval_steps_per_second": 0.537,
|
| 347 |
"step": 272
|
| 348 |
},
|
|
@@ -350,9 +350,9 @@
|
|
| 350 |
"epoch": 0.67,
|
| 351 |
"step": 272,
|
| 352 |
"total_flos": 455322233733120.0,
|
| 353 |
-
"train_loss": 0.
|
| 354 |
-
"train_runtime":
|
| 355 |
-
"train_samples_per_second": 17.
|
| 356 |
"train_steps_per_second": 0.035
|
| 357 |
}
|
| 358 |
],
|
|
|
|
| 17 |
{
|
| 18 |
"epoch": 0.01,
|
| 19 |
"learning_rate": 1.999251652147735e-05,
|
| 20 |
+
"loss": 1.6995,
|
| 21 |
"step": 5
|
| 22 |
},
|
| 23 |
{
|
| 24 |
"epoch": 0.02,
|
| 25 |
"learning_rate": 1.997007728639956e-05,
|
| 26 |
+
"loss": 1.1502,
|
| 27 |
"step": 10
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"epoch": 0.04,
|
| 31 |
"learning_rate": 1.9932715879473385e-05,
|
| 32 |
+
"loss": 1.0714,
|
| 33 |
"step": 15
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"epoch": 0.05,
|
| 37 |
"learning_rate": 1.9880488219356086e-05,
|
| 38 |
+
"loss": 1.0487,
|
| 39 |
"step": 20
|
| 40 |
},
|
| 41 |
{
|
|
|
|
| 59 |
{
|
| 60 |
"epoch": 0.1,
|
| 61 |
"learning_rate": 1.9524809490566878e-05,
|
| 62 |
+
"loss": 0.9805,
|
| 63 |
"step": 40
|
| 64 |
},
|
| 65 |
{
|
|
|
|
| 71 |
{
|
| 72 |
"epoch": 0.12,
|
| 73 |
"learning_rate": 1.926084840336821e-05,
|
| 74 |
+
"loss": 0.9814,
|
| 75 |
"step": 50
|
| 76 |
},
|
| 77 |
{
|
|
|
|
| 113 |
{
|
| 114 |
"epoch": 0.21,
|
| 115 |
"learning_rate": 1.791386494010081e-05,
|
| 116 |
+
"loss": 0.9642,
|
| 117 |
"step": 85
|
| 118 |
},
|
| 119 |
{
|
|
|
|
| 125 |
{
|
| 126 |
"epoch": 0.23,
|
| 127 |
"learning_rate": 1.7417625312098453e-05,
|
| 128 |
+
"loss": 0.9444,
|
| 129 |
"step": 95
|
| 130 |
},
|
| 131 |
{
|
|
|
|
| 143 |
{
|
| 144 |
"epoch": 0.27,
|
| 145 |
"learning_rate": 1.659103377877423e-05,
|
| 146 |
+
"loss": 0.9498,
|
| 147 |
"step": 110
|
| 148 |
},
|
| 149 |
{
|
| 150 |
"epoch": 0.28,
|
| 151 |
"learning_rate": 1.629520819706912e-05,
|
| 152 |
+
"loss": 0.9464,
|
| 153 |
"step": 115
|
| 154 |
},
|
| 155 |
{
|
|
|
|
| 340 |
},
|
| 341 |
{
|
| 342 |
"epoch": 0.67,
|
| 343 |
+
"eval_loss": 0.9322898387908936,
|
| 344 |
+
"eval_runtime": 337.0923,
|
| 345 |
+
"eval_samples_per_second": 68.557,
|
| 346 |
"eval_steps_per_second": 0.537,
|
| 347 |
"step": 272
|
| 348 |
},
|
|
|
|
| 350 |
"epoch": 0.67,
|
| 351 |
"step": 272,
|
| 352 |
"total_flos": 455322233733120.0,
|
| 353 |
+
"train_loss": 0.9717322877224754,
|
| 354 |
+
"train_runtime": 11645.5121,
|
| 355 |
+
"train_samples_per_second": 17.849,
|
| 356 |
"train_steps_per_second": 0.035
|
| 357 |
}
|
| 358 |
],
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5624
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c5e5f1c2e60e8566b60a42c429bdeebcc5f5392a53938ef2fe0c39224dde9fc
|
| 3 |
size 5624
|