Training in progress, step 1800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
-
# tensor([[1.0000, 0.
|
290 |
-
# [0.
|
291 |
-
# [0.
|
292 |
```
|
293 |
|
294 |
<!--
|
@@ -1256,6 +1256,10 @@ You can finetune this model on your own dataset.
|
|
1256 |
| 0.0265 | 1500 | 0.6639 |
|
1257 |
| 0.0274 | 1550 | 0.6795 |
|
1258 |
| 0.0283 | 1600 | 0.6341 |
|
|
|
|
|
|
|
|
|
1259 |
|
1260 |
|
1261 |
### Framework Versions
|
|
|
286 |
# Get the similarity scores for the embeddings
|
287 |
similarities = model.similarity(embeddings, embeddings)
|
288 |
print(similarities)
|
289 |
+
# tensor([[1.0000, 0.6240, 0.1415],
|
290 |
+
# [0.6240, 1.0000, 0.1641],
|
291 |
+
# [0.1415, 0.1641, 1.0000]])
|
292 |
```
|
293 |
|
294 |
<!--
|
|
|
1256 |
| 0.0265 | 1500 | 0.6639 |
|
1257 |
| 0.0274 | 1550 | 0.6795 |
|
1258 |
| 0.0283 | 1600 | 0.6341 |
|
1259 |
+
| 0.0292 | 1650 | 0.6031 |
|
1260 |
+
| 0.0300 | 1700 | 0.5959 |
|
1261 |
+
| 0.0309 | 1750 | 0.6412 |
|
1262 |
+
| 0.0318 | 1800 | 0.5065 |
|
1263 |
|
1264 |
|
1265 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e655f66bc39e233b9c8a58c97b97703f4aeea426a1d4cf2e42279efc281c516e
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609611
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8edb9779f991324ee4a3850ff5e072ec580ee76e1759218d64fde606def14c42
|
3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14645
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c069f50ffa5dd2b5cfe72eb0edf43eb9716de220eca4477d056e688e57407292
|
3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1383
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa0408efb69cab96d5bab9a1aaf44cedbc9fc8d34f4cef378d81605e5c026d5c
|
3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1465
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd0879a5aed9e5750d6784df10ece392dd4d2a4115b2e37d50a195c94de0560
|
3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -232,6 +232,34 @@
|
|
232 |
"learning_rate": 1.4125441696113076e-05,
|
233 |
"loss": 0.6341,
|
234 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
}
|
236 |
],
|
237 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.03180717781979467,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 1800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
232 |
"learning_rate": 1.4125441696113076e-05,
|
233 |
"loss": 0.6341,
|
234 |
"step": 1600
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"epoch": 0.02915657966814511,
|
238 |
+
"grad_norm": 1.9698214530944824,
|
239 |
+
"learning_rate": 1.456713780918728e-05,
|
240 |
+
"loss": 0.6031,
|
241 |
+
"step": 1650
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"epoch": 0.03004011238536163,
|
245 |
+
"grad_norm": 2.495985507965088,
|
246 |
+
"learning_rate": 1.5008833922261484e-05,
|
247 |
+
"loss": 0.5959,
|
248 |
+
"step": 1700
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"epoch": 0.030923645102578147,
|
252 |
+
"grad_norm": 2.990360975265503,
|
253 |
+
"learning_rate": 1.545053003533569e-05,
|
254 |
+
"loss": 0.6412,
|
255 |
+
"step": 1750
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"epoch": 0.03180717781979467,
|
259 |
+
"grad_norm": 3.658212184906006,
|
260 |
+
"learning_rate": 1.5892226148409894e-05,
|
261 |
+
"loss": 0.5065,
|
262 |
+
"step": 1800
|
263 |
}
|
264 |
],
|
265 |
"logging_steps": 50,
|