hllj commited on
Commit
4c2a488
·
1 Parent(s): cbe9d92

Training in progress, step 25

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffcae6fbcd3ab48ddb27ac36a4307ce80c23389ed305f6c13af21c3774251b28
3
- size 218196746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a65b50967390cfef35f29c0c558040200984c1dc16570052ecad9979dae852d
3
+ size 872508554
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4507cecfab4aed7b850ede3e6b20862e39e7aafc5cefba9750cc95b8301e63a4
3
  size 872450448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0db23340a233d6a10240667d8ba287a38e66b15ecd1d863fee3447be11638b8
3
  size 872450448
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0.03,
3
- "eval_loss": 1.1997405290603638,
4
- "eval_runtime": 112.9062,
5
  "eval_samples": 650,
6
- "eval_samples_per_second": 5.757,
7
- "eval_steps_per_second": 1.444,
8
- "train_loss": 1.4715181255340577,
9
- "train_runtime": 335.1031,
10
  "train_samples": 5845,
11
- "train_samples_per_second": 0.597,
12
- "train_steps_per_second": 0.149
13
  }
 
1
  {
2
  "epoch": 0.03,
3
+ "eval_loss": 0.8876652717590332,
4
+ "eval_runtime": 113.1958,
5
  "eval_samples": 650,
6
+ "eval_samples_per_second": 5.742,
7
+ "eval_steps_per_second": 1.44,
8
+ "train_loss": 1.0970729541778566,
9
+ "train_runtime": 344.8794,
10
  "train_samples": 5845,
11
+ "train_samples_per_second": 0.58,
12
+ "train_steps_per_second": 0.145
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.03,
3
- "eval_loss": 1.1997405290603638,
4
- "eval_runtime": 112.9062,
5
  "eval_samples": 650,
6
- "eval_samples_per_second": 5.757,
7
- "eval_steps_per_second": 1.444
8
  }
 
1
  {
2
  "epoch": 0.03,
3
+ "eval_loss": 0.8876652717590332,
4
+ "eval_runtime": 113.1958,
5
  "eval_samples": 650,
6
+ "eval_samples_per_second": 5.742,
7
+ "eval_steps_per_second": 1.44
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.03,
3
- "train_loss": 1.4715181255340577,
4
- "train_runtime": 335.1031,
5
  "train_samples": 5845,
6
- "train_samples_per_second": 0.597,
7
- "train_steps_per_second": 0.149
8
  }
 
1
  {
2
  "epoch": 0.03,
3
+ "train_loss": 1.0970729541778566,
4
+ "train_runtime": 344.8794,
5
  "train_samples": 5845,
6
+ "train_samples_per_second": 0.58,
7
+ "train_steps_per_second": 0.145
8
  }
trainer_state.json CHANGED
@@ -17,64 +17,64 @@
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 2.838778253789822e-05,
20
- "loss": 1.7137,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
  "learning_rate": 2.1314021436425026e-05,
26
- "loss": 1.5219,
27
  "step": 20
28
  },
29
  {
30
  "epoch": 0.02,
31
- "eval_loss": 1.2538621425628662,
32
- "eval_runtime": 112.7562,
33
- "eval_samples_per_second": 5.765,
34
- "eval_steps_per_second": 1.446,
35
  "step": 25
36
  },
37
  {
38
  "epoch": 0.02,
39
  "learning_rate": 1.1522697745987076e-05,
40
- "loss": 1.4272,
41
  "step": 30
42
  },
43
  {
44
  "epoch": 0.03,
45
  "learning_rate": 3.2280092208200853e-06,
46
- "loss": 1.401,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 0.03,
51
  "learning_rate": 0.0,
52
- "loss": 1.3156,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 0.03,
57
- "eval_loss": 1.1997405290603638,
58
- "eval_runtime": 113.4502,
59
- "eval_samples_per_second": 5.729,
60
- "eval_steps_per_second": 1.437,
61
  "step": 50
62
  },
63
  {
64
  "epoch": 0.03,
65
  "step": 50,
66
- "total_flos": 3919242130882560.0,
67
- "train_loss": 1.4715181255340577,
68
- "train_runtime": 335.1031,
69
- "train_samples_per_second": 0.597,
70
- "train_steps_per_second": 0.149
71
  }
72
  ],
73
  "logging_steps": 10,
74
  "max_steps": 50,
75
  "num_train_epochs": 1,
76
  "save_steps": 25,
77
- "total_flos": 3919242130882560.0,
78
  "trial_name": null,
79
  "trial_params": null
80
  }
 
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 2.838778253789822e-05,
20
+ "loss": 1.5421,
21
  "step": 10
22
  },
23
  {
24
  "epoch": 0.01,
25
  "learning_rate": 2.1314021436425026e-05,
26
+ "loss": 1.1665,
27
  "step": 20
28
  },
29
  {
30
  "epoch": 0.02,
31
+ "eval_loss": 0.9467611908912659,
32
+ "eval_runtime": 113.1765,
33
+ "eval_samples_per_second": 5.743,
34
+ "eval_steps_per_second": 1.44,
35
  "step": 25
36
  },
37
  {
38
  "epoch": 0.02,
39
  "learning_rate": 1.1522697745987076e-05,
40
+ "loss": 0.9964,
41
  "step": 30
42
  },
43
  {
44
  "epoch": 0.03,
45
  "learning_rate": 3.2280092208200853e-06,
46
+ "loss": 0.9493,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 0.03,
51
  "learning_rate": 0.0,
52
+ "loss": 0.8357,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 0.03,
57
+ "eval_loss": 0.8876652717590332,
58
+ "eval_runtime": 114.0148,
59
+ "eval_samples_per_second": 5.701,
60
+ "eval_steps_per_second": 1.43,
61
  "step": 50
62
  },
63
  {
64
  "epoch": 0.03,
65
  "step": 50,
66
+ "total_flos": 4008716634423296.0,
67
+ "train_loss": 1.0970729541778566,
68
+ "train_runtime": 344.8794,
69
+ "train_samples_per_second": 0.58,
70
+ "train_steps_per_second": 0.145
71
  }
72
  ],
73
  "logging_steps": 10,
74
  "max_steps": 50,
75
  "num_train_epochs": 1,
76
  "save_steps": 25,
77
+ "total_flos": 4008716634423296.0,
78
  "trial_name": null,
79
  "trial_params": null
80
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf7b7d3708042371320e5cc19f2093931cd4f9e404337a086c3d1743671d6c8a
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4fc530190fc50faa5c41b7996377ae6495a8c82a8243129b1546cca66170e8
3
  size 4664