tangledgroup
/

tangled-alpha-0.3-core

@@ -61,9 +61,11 @@ train:
   global_batch_size: 512
   # global_batch_size: 256
   # global_batch_size: 128
   # Number of samples per data-parallel rank (type: int, default: 4)
-  micro_batch_size: 4
   # micro_batch_size: 2
   # micro_batch_size: 1

   global_batch_size: 512
   # global_batch_size: 256
   # global_batch_size: 128
+  # global_batch_size: 64
   # Number of samples per data-parallel rank (type: int, default: 4)
+  micro_batch_size: 6
+  # micro_batch_size: 4
   # micro_batch_size: 2
   # micro_batch_size: 1

pretrain core 0