Upload folder using huggingface_hub
Browse files- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_0.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_10.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_12.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_14.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_16.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_18.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_2.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_20.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_21.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_22.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_23.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_24.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_25.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_26.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_27.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_28.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_29.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_30.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_31.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_32.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_33.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_34.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_35.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_36.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_37.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_38.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_39.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_4.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_40.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_41.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_42.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_43.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_44.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_46.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_47.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_48.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_49.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_6.pt +3 -0
- synthetic-rl-v2/ckpts/policy_weights/policy_epoch_8.pt +3 -0
- synthetic-rl-v2/config.txt +121 -0
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2837b29174ccca0ac7c2fba9782f849bc1e7b636649209fa7d0f8574eab7e469
|
3 |
+
size 803723798
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_10.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02e2cbae9927e027d472c9a5da3e0f3a55b7bb9dd033792a2c468798d3900ed4
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_12.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a9a6c8de36ee68dcc9dd35ef38f989b36fa25281ad7de4e03ba4f08d9b42322
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_14.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a36d5da68aaa34aa52e1b07ed1a3fea0f71861dc53257c5d26fb4090620bc3a
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_16.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3199df41a9dc15590184842bd3b78c2cb5668e67905f1df7bb94f54ade96244
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_18.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46e972bc07b0862b2b011daf946405ea29f0e30520715a57ddf88b9cba8036a5
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_2.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da969af4f1f653575e91d7c4319befe976bb0b4d7f9db109aed82821f4b2df2a
|
3 |
+
size 803723798
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_20.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17fd8c51139729aa80ccf6e01b41495aebdea2309ecc481a0d39dbb2d2e1ee04
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_21.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d1e741e49814c0d760e148318e7b03817bf22a14aeb0d224a68b5aede6cb18f
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_22.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a40fe9c86330fe6f242b0d09e468fc31e27b0522747660a8fc6161b78f1075ab
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_23.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95a3facffe6ce7f9681322120c4f123317f60f3eee81051a8ee1a176a33a3113
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_24.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15e1f091535ec713c0c5ebe12684137c3f380e798395a6a6457383e747470060
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_25.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a04508f25347cf931189ebe3040e93d4a98deb1692894034f80d561e3f44b6cb
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_26.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc2153c1eeb8e92732b482ae7cde97e0eb24378c891eded364c36ed5e3759add
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_27.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5011b360ba80cfca1c9827f1237019bdf1e5900ee6f0ce234310e84945898bd6
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31e1086814ed137d99ecc34c68cde3aaeb827b2b99524be1bf7026f84559b86d
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_29.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:096ea15aeb6fcbf8ac0ecaae0338962a7cc9a5401d13a47854bbb46cf5ba3b51
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_30.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08e6a1803826656ea20e41360180332cfcd33968b0a84846b84cf55d250d6990
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_31.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:716d9da6ce140584fac073e3bc40645dbca03ff001c2fbe4f52f589a4876f2c6
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_32.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18f290520587dcd1911c0f03ebd6063fa926801254adb06d1351152614141bf0
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_33.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf49d2f432482ee7842282913e2ee5ae2e2dd26a545b2e95c5e23c31610db6bc
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_34.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06fce0ae91e87b2cdd29167169439287d4921ea29f7090029db3010989b3f8f2
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_35.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:807a2053afe50b011284279b0bbe88c4f1319261db17c3db6910d00278360325
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_36.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ffb57e976c6f7235907e3e866354b2ab6cb0d74baec13a09eb2e31b9bffaa40
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_37.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c32b98957f896256f3680e53892ebea42df1eb9324a0c0f4fd5b7c11ca156592
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_38.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7d9b27e767d15cb5026e68847832ef2ff025fa11678dbe34d0a3b154d5b7c55
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_39.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64b29e4740e3f3885599657853ca679eb105164d786d9e4056fd188f2db43853
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_4.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7c4040c89fc92bc1e922717b2fc1b16002c9d869c83df2f0220e7c773be80ba
|
3 |
+
size 803723798
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_40.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19072cf66a7a203be72273c2cabf4600eed4f9dd88c94b459d3c84c163461dce
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_41.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff9e05620619765bec332b7b0ae881bfde907d326b81e9eeedbc8c54bd435fb7
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_42.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:143e7875e62b41eeb818b9f3c689e4d41886050a84b5ab9139a0a98a1efc3e22
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_43.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d8081e208f29d6451bc94853c18bd8b56d62b3833b38a945f59fb326ecd7c66
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_44.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5ddbd9c2a6ce721bdbb54940f758453ec4a2b6784a4036d5375855fa0699979
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_46.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:654fc67998610db293d3839bb2810ce12203ceb7e887330c41ff6a90c0953476
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_47.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3f5b8737f9df0d21c562c57dad58b607db17bb39531bc0a63fa6a3e5a206c85
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_48.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbbca4dd6a8f84936f438ee8f2f621be107df3612639b1c78ea7c88fd3d42c04
|
3 |
+
size 803724162
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_49.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfdfedcc5f5422c7a3e02db3eb417bd2144dded484eb1835bb31181c0d3e313e
|
3 |
+
size 590839808
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_6.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca3ee653811f9aabe4855a4571b771e874c9cd0f2d279341cb3f79c4329b3315
|
3 |
+
size 803723798
|
synthetic-rl-v2/ckpts/policy_weights/policy_epoch_8.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee24ac9631058163df5b1eba54a257457add45b67e7d95eccd09856cd8f16f79
|
3 |
+
size 803723798
|
synthetic-rl-v2/config.txt
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Parameters for Actor:
|
2 |
+
# ==============================================================================
|
3 |
+
Actor.activation = 'leaky_relu'
|
4 |
+
Actor.cont_dist_kind = 'normal'
|
5 |
+
Actor.d_hidden = 512
|
6 |
+
Actor.dropout_p = 0.0
|
7 |
+
Actor.gmm_modes = 5
|
8 |
+
Actor.log_std_high = 2.0
|
9 |
+
Actor.log_std_low = -5.0
|
10 |
+
Actor.n_layers = 2
|
11 |
+
|
12 |
+
# Parameters for Agent:
|
13 |
+
# ==============================================================================
|
14 |
+
Agent.fake_filter = False
|
15 |
+
Agent.gamma = 0.999
|
16 |
+
Agent.num_critics = 6
|
17 |
+
Agent.num_critics_td = 2
|
18 |
+
Agent.popart = True
|
19 |
+
Agent.reward_multiplier = 10.0
|
20 |
+
Agent.tau = 0.003
|
21 |
+
Agent.use_multigamma = True
|
22 |
+
Agent.use_target_actor = True
|
23 |
+
|
24 |
+
# Parameters for Experiment:
|
25 |
+
# ==============================================================================
|
26 |
+
Experiment.batches_per_update = 1
|
27 |
+
Experiment.critic_loss_weight = 10.0
|
28 |
+
Experiment.env_mode = 'async'
|
29 |
+
Experiment.force_reset_train_envs_every = None
|
30 |
+
Experiment.grad_clip = 1.0
|
31 |
+
Experiment.has_replay_buffer_rights = True
|
32 |
+
Experiment.l2_coeff = 0.001
|
33 |
+
Experiment.learning_rate = 0.0001
|
34 |
+
Experiment.local_time_optimizer = False
|
35 |
+
Experiment.lr_warmup_steps = 500
|
36 |
+
Experiment.mixed_precision = 'no'
|
37 |
+
Experiment.padded_sampling = 'none'
|
38 |
+
Experiment.save_trajs_as = 'npz'
|
39 |
+
Experiment.stagger_traj_file_lengths = True
|
40 |
+
Experiment.wandb_group_name = None
|
41 |
+
|
42 |
+
# Parameters for FlashAttention:
|
43 |
+
# ==============================================================================
|
44 |
+
FlashAttention.window_size = (-1, -1)
|
45 |
+
|
46 |
+
# Parameters for MetamonTstepEncoder:
|
47 |
+
# ==============================================================================
|
48 |
+
MetamonTstepEncoder.d_model = 160
|
49 |
+
MetamonTstepEncoder.extra_emb_dim = 18
|
50 |
+
MetamonTstepEncoder.n_heads = 8
|
51 |
+
MetamonTstepEncoder.n_layers = 5
|
52 |
+
MetamonTstepEncoder.scratch_tokens = 11
|
53 |
+
MetamonTstepEncoder.token_mask_aug = False
|
54 |
+
|
55 |
+
# Parameters for Multigammas:
|
56 |
+
# ==============================================================================
|
57 |
+
Multigammas.continuous = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
|
58 |
+
Multigammas.discrete = [0.1, 0.9, 0.95, 0.97, 0.99, 0.995]
|
59 |
+
|
60 |
+
# Parameters for MultiModalEmbedding:
|
61 |
+
# ==============================================================================
|
62 |
+
MultiModalEmbedding.dropout = 0.05
|
63 |
+
MultiModalEmbedding.numerical_tokens = 6
|
64 |
+
|
65 |
+
# Parameters for MultiTaskAgent:
|
66 |
+
# ==============================================================================
|
67 |
+
MultiTaskAgent.fbc_filter_k = 3
|
68 |
+
MultiTaskAgent.offline_coeff = 1.0
|
69 |
+
MultiTaskAgent.online_coeff = 0.0
|
70 |
+
|
71 |
+
# Parameters for NCritics:
|
72 |
+
# ==============================================================================
|
73 |
+
NCritics.activation = 'leaky_relu'
|
74 |
+
NCritics.d_hidden = 512
|
75 |
+
NCritics.dropout_p = 0.0
|
76 |
+
NCritics.n_layers = 2
|
77 |
+
|
78 |
+
# Parameters for NCriticsTwoHot:
|
79 |
+
# ==============================================================================
|
80 |
+
NCriticsTwoHot.activation = 'leaky_relu'
|
81 |
+
NCriticsTwoHot.d_hidden = 512
|
82 |
+
NCriticsTwoHot.dropout_p = 0.0
|
83 |
+
NCriticsTwoHot.max_return = 1100
|
84 |
+
NCriticsTwoHot.min_return = -1100
|
85 |
+
NCriticsTwoHot.n_layers = 2
|
86 |
+
NCriticsTwoHot.output_bins = 96
|
87 |
+
NCriticsTwoHot.use_symlog = False
|
88 |
+
|
89 |
+
# Parameters for PopArtLayer:
|
90 |
+
# ==============================================================================
|
91 |
+
PopArtLayer.beta = 0.0005
|
92 |
+
PopArtLayer.init_nu = 100.0
|
93 |
+
|
94 |
+
# Parameters for TformerTrajEncoder:
|
95 |
+
# ==============================================================================
|
96 |
+
TformerTrajEncoder.activation = 'leaky_relu'
|
97 |
+
TformerTrajEncoder.causal = True
|
98 |
+
TformerTrajEncoder.d_ff = 5120
|
99 |
+
TformerTrajEncoder.d_model = 1280
|
100 |
+
TformerTrajEncoder.dropout_attn = 0.0
|
101 |
+
TformerTrajEncoder.dropout_emb = 0.05
|
102 |
+
TformerTrajEncoder.dropout_ff = 0.05
|
103 |
+
TformerTrajEncoder.dropout_qkv = 0.0
|
104 |
+
TformerTrajEncoder.head_scaling = True
|
105 |
+
TformerTrajEncoder.n_heads = 20
|
106 |
+
TformerTrajEncoder.n_layers = 9
|
107 |
+
TformerTrajEncoder.norm = 'layer'
|
108 |
+
TformerTrajEncoder.normformer_norms = True
|
109 |
+
TformerTrajEncoder.sigma_reparam = True
|
110 |
+
|
111 |
+
# Parameters for TimestepTransformer:
|
112 |
+
# ==============================================================================
|
113 |
+
# None.
|
114 |
+
|
115 |
+
# Parameters for TokenEmbedding:
|
116 |
+
# ==============================================================================
|
117 |
+
# None.
|
118 |
+
|
119 |
+
# Parameters for TransformerTurnEmbedding:
|
120 |
+
# ==============================================================================
|
121 |
+
TransformerTurnEmbedding.dropout = 0.05
|