jakegrigsby commited on
Commit
033b28d
·
verified ·
1 Parent(s): 02dccd4

Upload folder using huggingface_hub

Browse files
Files changed (40) hide show
  1. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_0.pt +3 -0
  2. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_10.pt +3 -0
  3. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_12.pt +3 -0
  4. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_14.pt +3 -0
  5. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_16.pt +3 -0
  6. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_18.pt +3 -0
  7. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_2.pt +3 -0
  8. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_20.pt +3 -0
  9. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_21.pt +3 -0
  10. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_22.pt +3 -0
  11. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_23.pt +3 -0
  12. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_24.pt +3 -0
  13. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_25.pt +3 -0
  14. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_26.pt +3 -0
  15. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_27.pt +3 -0
  16. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_28.pt +3 -0
  17. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_29.pt +3 -0
  18. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_30.pt +3 -0
  19. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_31.pt +3 -0
  20. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_32.pt +3 -0
  21. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_33.pt +3 -0
  22. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_34.pt +3 -0
  23. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_35.pt +3 -0
  24. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_36.pt +3 -0
  25. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_37.pt +3 -0
  26. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_38.pt +3 -0
  27. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_39.pt +3 -0
  28. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_4.pt +3 -0
  29. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_40.pt +3 -0
  30. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_41.pt +3 -0
  31. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_42.pt +3 -0
  32. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_43.pt +3 -0
  33. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_44.pt +3 -0
  34. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_46.pt +3 -0
  35. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_47.pt +3 -0
  36. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_48.pt +3 -0
  37. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_49.pt +3 -0
  38. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_6.pt +3 -0
  39. synthetic-rl-v2/ckpts/policy_weights/policy_epoch_8.pt +3 -0
  40. synthetic-rl-v2/config.txt +121 -0
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2837b29174ccca0ac7c2fba9782f849bc1e7b636649209fa7d0f8574eab7e469
3
+ size 803723798
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02e2cbae9927e027d472c9a5da3e0f3a55b7bb9dd033792a2c468798d3900ed4
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_12.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a9a6c8de36ee68dcc9dd35ef38f989b36fa25281ad7de4e03ba4f08d9b42322
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a36d5da68aaa34aa52e1b07ed1a3fea0f71861dc53257c5d26fb4090620bc3a
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_16.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3199df41a9dc15590184842bd3b78c2cb5668e67905f1df7bb94f54ade96244
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_18.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e972bc07b0862b2b011daf946405ea29f0e30520715a57ddf88b9cba8036a5
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da969af4f1f653575e91d7c4319befe976bb0b4d7f9db109aed82821f4b2df2a
3
+ size 803723798
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_20.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17fd8c51139729aa80ccf6e01b41495aebdea2309ecc481a0d39dbb2d2e1ee04
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_21.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d1e741e49814c0d760e148318e7b03817bf22a14aeb0d224a68b5aede6cb18f
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_22.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a40fe9c86330fe6f242b0d09e468fc31e27b0522747660a8fc6161b78f1075ab
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_23.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95a3facffe6ce7f9681322120c4f123317f60f3eee81051a8ee1a176a33a3113
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_24.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e1f091535ec713c0c5ebe12684137c3f380e798395a6a6457383e747470060
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_25.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04508f25347cf931189ebe3040e93d4a98deb1692894034f80d561e3f44b6cb
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_26.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc2153c1eeb8e92732b482ae7cde97e0eb24378c891eded364c36ed5e3759add
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_27.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5011b360ba80cfca1c9827f1237019bdf1e5900ee6f0ce234310e84945898bd6
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_28.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31e1086814ed137d99ecc34c68cde3aaeb827b2b99524be1bf7026f84559b86d
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_29.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:096ea15aeb6fcbf8ac0ecaae0338962a7cc9a5401d13a47854bbb46cf5ba3b51
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_30.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e6a1803826656ea20e41360180332cfcd33968b0a84846b84cf55d250d6990
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_31.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:716d9da6ce140584fac073e3bc40645dbca03ff001c2fbe4f52f589a4876f2c6
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f290520587dcd1911c0f03ebd6063fa926801254adb06d1351152614141bf0
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_33.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf49d2f432482ee7842282913e2ee5ae2e2dd26a545b2e95c5e23c31610db6bc
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_34.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06fce0ae91e87b2cdd29167169439287d4921ea29f7090029db3010989b3f8f2
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_35.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:807a2053afe50b011284279b0bbe88c4f1319261db17c3db6910d00278360325
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_36.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffb57e976c6f7235907e3e866354b2ab6cb0d74baec13a09eb2e31b9bffaa40
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_37.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c32b98957f896256f3680e53892ebea42df1eb9324a0c0f4fd5b7c11ca156592
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_38.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d9b27e767d15cb5026e68847832ef2ff025fa11678dbe34d0a3b154d5b7c55
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_39.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b29e4740e3f3885599657853ca679eb105164d786d9e4056fd188f2db43853
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c4040c89fc92bc1e922717b2fc1b16002c9d869c83df2f0220e7c773be80ba
3
+ size 803723798
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19072cf66a7a203be72273c2cabf4600eed4f9dd88c94b459d3c84c163461dce
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_41.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff9e05620619765bec332b7b0ae881bfde907d326b81e9eeedbc8c54bd435fb7
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_42.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:143e7875e62b41eeb818b9f3c689e4d41886050a84b5ab9139a0a98a1efc3e22
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_43.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d8081e208f29d6451bc94853c18bd8b56d62b3833b38a945f59fb326ecd7c66
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_44.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ddbd9c2a6ce721bdbb54940f758453ec4a2b6784a4036d5375855fa0699979
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_46.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:654fc67998610db293d3839bb2810ce12203ceb7e887330c41ff6a90c0953476
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_47.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f5b8737f9df0d21c562c57dad58b607db17bb39531bc0a63fa6a3e5a206c85
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_48.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbbca4dd6a8f84936f438ee8f2f621be107df3612639b1c78ea7c88fd3d42c04
3
+ size 803724162
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_49.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfdfedcc5f5422c7a3e02db3eb417bd2144dded484eb1835bb31181c0d3e313e
3
+ size 590839808
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca3ee653811f9aabe4855a4571b771e874c9cd0f2d279341cb3f79c4329b3315
3
+ size 803723798
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee24ac9631058163df5b1eba54a257457add45b67e7d95eccd09856cd8f16f79
3
+ size 803723798
synthetic-rl-v2/config.txt ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Parameters for Actor:
2
+ # ==============================================================================
3
+ Actor.activation = 'leaky_relu'
4
+ Actor.cont_dist_kind = 'normal'
5
+ Actor.d_hidden = 512
6
+ Actor.dropout_p = 0.0
7
+ Actor.gmm_modes = 5
8
+ Actor.log_std_high = 2.0
9
+ Actor.log_std_low = -5.0
10
+ Actor.n_layers = 2
11
+
12
+ # Parameters for Agent:
13
+ # ==============================================================================
14
+ Agent.fake_filter = False
15
+ Agent.gamma = 0.999
16
+ Agent.num_critics = 6
17
+ Agent.num_critics_td = 2
18
+ Agent.popart = True
19
+ Agent.reward_multiplier = 10.0
20
+ Agent.tau = 0.003
21
+ Agent.use_multigamma = True
22
+ Agent.use_target_actor = True
23
+
24
+ # Parameters for Experiment:
25
+ # ==============================================================================
26
+ Experiment.batches_per_update = 1
27
+ Experiment.critic_loss_weight = 10.0
28
+ Experiment.env_mode = 'async'
29
+ Experiment.force_reset_train_envs_every = None
30
+ Experiment.grad_clip = 1.0
31
+ Experiment.has_replay_buffer_rights = True
32
+ Experiment.l2_coeff = 0.001
33
+ Experiment.learning_rate = 0.0001
34
+ Experiment.local_time_optimizer = False
35
+ Experiment.lr_warmup_steps = 500
36
+ Experiment.mixed_precision = 'no'
37
+ Experiment.padded_sampling = 'none'
38
+ Experiment.save_trajs_as = 'npz'
39
+ Experiment.stagger_traj_file_lengths = True
40
+ Experiment.wandb_group_name = None
41
+
42
+ # Parameters for FlashAttention:
43
+ # ==============================================================================
44
+ FlashAttention.window_size = (-1, -1)
45
+
46
+ # Parameters for MetamonTstepEncoder:
47
+ # ==============================================================================
48
+ MetamonTstepEncoder.d_model = 160
49
+ MetamonTstepEncoder.extra_emb_dim = 18
50
+ MetamonTstepEncoder.n_heads = 8
51
+ MetamonTstepEncoder.n_layers = 5
52
+ MetamonTstepEncoder.scratch_tokens = 11
53
+ MetamonTstepEncoder.token_mask_aug = False
54
+
55
+ # Parameters for Multigammas:
56
+ # ==============================================================================
57
+ Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
58
+ Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
59
+
60
+ # Parameters for MultiModalEmbedding:
61
+ # ==============================================================================
62
+ MultiModalEmbedding.dropout = 0.05
63
+ MultiModalEmbedding.numerical_tokens = 6
64
+
65
+ # Parameters for MultiTaskAgent:
66
+ # ==============================================================================
67
+ MultiTaskAgent.fbc_filter_k = 3
68
+ MultiTaskAgent.offline_coeff = 1.0
69
+ MultiTaskAgent.online_coeff = 0.0
70
+
71
+ # Parameters for NCritics:
72
+ # ==============================================================================
73
+ NCritics.activation = 'leaky_relu'
74
+ NCritics.d_hidden = 512
75
+ NCritics.dropout_p = 0.0
76
+ NCritics.n_layers = 2
77
+
78
+ # Parameters for NCriticsTwoHot:
79
+ # ==============================================================================
80
+ NCriticsTwoHot.activation = 'leaky_relu'
81
+ NCriticsTwoHot.d_hidden = 512
82
+ NCriticsTwoHot.dropout_p = 0.0
83
+ NCriticsTwoHot.max_return = 1100
84
+ NCriticsTwoHot.min_return = -1100
85
+ NCriticsTwoHot.n_layers = 2
86
+ NCriticsTwoHot.output_bins = 96
87
+ NCriticsTwoHot.use_symlog = False
88
+
89
+ # Parameters for PopArtLayer:
90
+ # ==============================================================================
91
+ PopArtLayer.beta = 0.0005
92
+ PopArtLayer.init_nu = 100.0
93
+
94
+ # Parameters for TformerTrajEncoder:
95
+ # ==============================================================================
96
+ TformerTrajEncoder.activation = 'leaky_relu'
97
+ TformerTrajEncoder.causal = True
98
+ TformerTrajEncoder.d_ff = 5120
99
+ TformerTrajEncoder.d_model = 1280
100
+ TformerTrajEncoder.dropout_attn = 0.0
101
+ TformerTrajEncoder.dropout_emb = 0.05
102
+ TformerTrajEncoder.dropout_ff = 0.05
103
+ TformerTrajEncoder.dropout_qkv = 0.0
104
+ TformerTrajEncoder.head_scaling = True
105
+ TformerTrajEncoder.n_heads = 20
106
+ TformerTrajEncoder.n_layers = 9
107
+ TformerTrajEncoder.norm = 'layer'
108
+ TformerTrajEncoder.normformer_norms = True
109
+ TformerTrajEncoder.sigma_reparam = True
110
+
111
+ # Parameters for TimestepTransformer:
112
+ # ==============================================================================
113
+ # None.
114
+
115
+ # Parameters for TokenEmbedding:
116
+ # ==============================================================================
117
+ # None.
118
+
119
+ # Parameters for TransformerTurnEmbedding:
120
+ # ==============================================================================
121
+ TransformerTurnEmbedding.dropout = 0.05