pretrain core 0
Browse files
scripts/pretrain-core-model-0.yaml
CHANGED
|
@@ -110,29 +110,29 @@ eval:
|
|
| 110 |
|
| 111 |
# Optimizer-related arguments
|
| 112 |
|
| 113 |
-
# optimizer:
|
| 114 |
-
# # class_path: torch.optim.AdamW
|
| 115 |
-
# class_path: torchao.prototype.low_bit_optim.AdamW8bit
|
| 116 |
-
# # class_path: torchao.prototype.low_bit_optim.AdamW4bit
|
| 117 |
-
# # class_path: bitsandbytes.optim.AdamW8bit
|
| 118 |
-
# # class_path: bitsandbytes.optim.PagedAdamW8bit
|
| 119 |
-
# init_args:
|
| 120 |
-
# # (type: float, default: 0.001)
|
| 121 |
-
# lr: 1e-4
|
| 122 |
-
# # (type: float, default: 0.01)
|
| 123 |
-
# weight_decay: 0.01
|
| 124 |
-
# # (type: tuple, default: (0.9,0.999))
|
| 125 |
-
# betas:
|
| 126 |
-
# - 0.9
|
| 127 |
-
# - 0.99
|
| 128 |
-
|
| 129 |
optimizer:
|
| 130 |
-
class_path:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
init_args:
|
|
|
|
| 132 |
lr: 1e-4
|
|
|
|
| 133 |
weight_decay: 0.01
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
# How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
|
| 138 |
devices: auto
|
|
|
|
| 110 |
|
| 111 |
# Optimizer-related arguments
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
optimizer:
|
| 114 |
+
class_path: torch.optim.AdamW
|
| 115 |
+
# class_path: torchao.prototype.low_bit_optim.AdamW8bit
|
| 116 |
+
# class_path: torchao.prototype.low_bit_optim.AdamW4bit
|
| 117 |
+
# class_path: bitsandbytes.optim.AdamW8bit
|
| 118 |
+
# class_path: bitsandbytes.optim.PagedAdamW8bit
|
| 119 |
init_args:
|
| 120 |
+
# (type: float, default: 0.001)
|
| 121 |
lr: 1e-4
|
| 122 |
+
# (type: float, default: 0.01)
|
| 123 |
weight_decay: 0.01
|
| 124 |
+
# (type: tuple, default: (0.9,0.999))
|
| 125 |
+
betas:
|
| 126 |
+
- 0.9
|
| 127 |
+
- 0.999
|
| 128 |
+
|
| 129 |
+
# optimizer:
|
| 130 |
+
# class_path: dolphinflow.DolphinFlow
|
| 131 |
+
# init_args:
|
| 132 |
+
# lr: 1e-4
|
| 133 |
+
# weight_decay: 0.01
|
| 134 |
+
# momentum: 0.9
|
| 135 |
+
# nesterov: true
|
| 136 |
|
| 137 |
# How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
|
| 138 |
devices: auto
|