Muennighoff commited on
Commit
076928f
·
1 Parent(s): afa9fd2
Files changed (30) hide show
  1. 2b855b50c4py/evaluation/2b855b50c4py_0_babi.json +22 -0
  2. 2b855b50c4py/evaluation/2b855b50c4py_1_babi.json +22 -0
  3. 2b855b50c4py/evaluation/2b855b50c4py_2_babi.json +22 -0
  4. 2b855b50c4py/evaluation/2b855b50c4py_3_babi.json +22 -0
  5. 2b855b50c4py/evaluation/2b855b50c4py_4_babi.json +22 -0
  6. 2b855b50c4py/evaluation/2b855b50c4py_5_babi.json +4 -4
  7. 2b855b60c4py/evaluation/2b855b60c4py_0_babi.json +22 -0
  8. 2b855b60c4py/evaluation/2b855b60c4py_1_babi.json +22 -0
  9. 2b855b60c4py/evaluation/2b855b60c4py_2_babi.json +22 -0
  10. 2b855b60c4py/evaluation/2b855b60c4py_3_babi.json +22 -0
  11. 2b855b60c4py/evaluation/2b855b60c4py_4_babi.json +22 -0
  12. 2b855b60c4py/evaluation/2b855b60c4py_5_babi.json +4 -4
  13. 2b855b70c4py/evaluation/2b855b70c4py_0_babi.json +22 -0
  14. 2b855b70c4py/evaluation/2b855b70c4py_1_babi.json +22 -0
  15. 2b855b70c4py/evaluation/2b855b70c4py_2_babi.json +22 -0
  16. 2b855b70c4py/evaluation/2b855b70c4py_3_babi.json +22 -0
  17. 2b855b70c4py/evaluation/2b855b70c4py_4_babi.json +22 -0
  18. 2b855b70c4py/evaluation/2b855b70c4py_5_babi.json +4 -4
  19. 2b855b80c4py/evaluation/2b855b80c4py_0_babi.json +22 -0
  20. 2b855b80c4py/evaluation/2b855b80c4py_1_babi.json +22 -0
  21. 2b855b80c4py/evaluation/2b855b80c4py_2_babi.json +22 -0
  22. 2b855b80c4py/evaluation/2b855b80c4py_3_babi.json +22 -0
  23. 2b855b80c4py/evaluation/2b855b80c4py_4_babi.json +22 -0
  24. 2b855b80c4py/evaluation/2b855b80c4py_5_babi.json +4 -4
  25. 2b855b90c4py/evaluation/2b855b90c4py_0_babi.json +22 -0
  26. 2b855b90c4py/evaluation/2b855b90c4py_1_babi.json +22 -0
  27. 2b855b90c4py/evaluation/2b855b90c4py_2_babi.json +22 -0
  28. 2b855b90c4py/evaluation/2b855b90c4py_3_babi.json +22 -0
  29. 2b855b90c4py/evaluation/2b855b90c4py_4_babi.json +22 -0
  30. 2b855b90c4py/evaluation/2b855b90c4py_5_babi.json +4 -4
2b855b50c4py/evaluation/2b855b50c4py_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b50c4py/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50c4py/evaluation/2b855b50c4py_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.06666666666666667,
5
+ "em_stderr": 0.004554959563627512
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b50c4py/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50c4py/evaluation/2b855b50c4py_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.14633333333333334,
5
+ "em_stderr": 0.006453979205019232
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b50c4py/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50c4py/evaluation/2b855b50c4py_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.2,
5
+ "em_stderr": 0.007304184899016065
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b50c4py/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50c4py/evaluation/2b855b50c4py_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.235,
5
+ "em_stderr": 0.007742414459363012
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b50c4py/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50c4py/evaluation/2b855b50c4py_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.26600441501103755,
5
- "em_stderr": 0.014688141802812468
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.25866666666666666,
5
+ "em_stderr": 0.007996294203008543
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
2b855b60c4py/evaluation/2b855b60c4py_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b60c4py/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60c4py/evaluation/2b855b60c4py_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.07366666666666667,
5
+ "em_stderr": 0.0047701377724078675
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b60c4py/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60c4py/evaluation/2b855b60c4py_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.14433333333333334,
5
+ "em_stderr": 0.00641722689389614
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b60c4py/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60c4py/evaluation/2b855b60c4py_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.193,
5
+ "em_stderr": 0.007206546429264379
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b60c4py/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60c4py/evaluation/2b855b60c4py_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.23066666666666666,
5
+ "em_stderr": 0.007692392962423979
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b60c4py/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60c4py/evaluation/2b855b60c4py_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.2560706401766004,
5
- "em_stderr": 0.014508464567494349
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.24633333333333332,
5
+ "em_stderr": 0.00786797575120501
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
2b855b70c4py/evaluation/2b855b70c4py_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70c4py/evaluation/2b855b70c4py_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.071,
5
+ "em_stderr": 0.004689740350472808
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70c4py/evaluation/2b855b70c4py_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.149,
5
+ "em_stderr": 0.006502340102527321
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70c4py/evaluation/2b855b70c4py_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.19233333333333333,
5
+ "em_stderr": 0.007197060045498059
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70c4py/evaluation/2b855b70c4py_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.21133333333333335,
5
+ "em_stderr": 0.007454911831939557
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70c4py/evaluation/2b855b70c4py_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.2229580573951435,
5
- "em_stderr": 0.013835967302718943
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.22633333333333333,
5
+ "em_stderr": 0.007641224680860716
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
2b855b80c4py/evaluation/2b855b80c4py_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80c4py/evaluation/2b855b80c4py_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.071,
5
+ "em_stderr": 0.004689740350472808
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80c4py/evaluation/2b855b80c4py_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.149,
5
+ "em_stderr": 0.006502340102527321
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80c4py/evaluation/2b855b80c4py_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.19233333333333333,
5
+ "em_stderr": 0.007197060045498059
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80c4py/evaluation/2b855b80c4py_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.21133333333333335,
5
+ "em_stderr": 0.007454911831939557
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b70c4py/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80c4py/evaluation/2b855b80c4py_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.2229580573951435,
5
- "em_stderr": 0.013835967302718943
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.22633333333333333,
5
+ "em_stderr": 0.007641224680860716
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
2b855b90c4py/evaluation/2b855b90c4py_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b90c4py/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90c4py/evaluation/2b855b90c4py_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.012,
5
+ "em_stderr": 0.0019882951925181257
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b90c4py/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90c4py/evaluation/2b855b90c4py_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.03866666666666667,
5
+ "em_stderr": 0.0035206036511572203
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b90c4py/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90c4py/evaluation/2b855b90c4py_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.06566666666666666,
5
+ "em_stderr": 0.004523089426985369
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b90c4py/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90c4py/evaluation/2b855b90c4py_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.07433333333333333,
5
+ "em_stderr": 0.004789948938479642
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-c4py/2b855b90c4py/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90c4py/evaluation/2b855b90c4py_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.07836644591611479,
5
- "em_stderr": 0.008933464682765763
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.08333333333333333,
5
+ "em_stderr": 0.005046925147795103
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }