Upload ipt_fineinstructions_all_exp_chat_100b/config.yaml with huggingface_hub

Browse files

Files changed (1) hide show

ipt_fineinstructions_all_exp_chat_100b/config.yaml +113 -0

ipt_fineinstructions_all_exp_chat_100b/config.yaml ADDED Viewed

	@@ -0,0 +1,113 @@

+name: fineinstructions_ipt_fineinstructions_all_exp_chat_100b
+dump_dir: /fsx/craffel/fineinstructions/pretraining/ipt_fineinstructions_all_exp_chat_100b/
+seed: 777
+grad_acc_steps: 8
+gc_collect_freq: 1000
+probe_freq: null
+steps: 88000
+data:
+  root_dir: /scratch/craffel/lingua/data/fineinstructions/
+  sources:
+    ipt_fineinstructions_all_exp_chat: 1.0
+  batch_size: 4
+  seq_len: 4096
+  n_views: 2
+  seed: 42
+  add_bos: true
+  add_eos: true
+  load_async: true
+  prefetch_size: 1024
+  tokenizer:
+    name: tiktoken
+    path: /fsx/craffel/lingua/tokenizers/llama3.model
+    n_words: null
+optim:
+  lr: 0.001
+  weight_decay: 0.1
+  epsilon: 1.0e-08
+  beta1: 0.9
+  beta2: 0.95
+  clip: 1.0
+  scheduler: cosine
+  warmup: 2000
+  lr_min_ratio: 1.0e-06
+  cycle_length: 1.0
+  cosine_theta: 1.0
+  annealing_step: 1000
+  decay_fraction: 0.1
+  exp_factor: 0.5
+model:
+  dim: 2048
+  n_layers: 25
+  head_dim: null
+  n_heads: 16
+  n_kv_heads: null
+  ffn_dim_multiplier: null
+  multiple_of: 256
+  norm_eps: 1.0e-05
+  rope_theta: 10000.0
+  init_base_std: null
+  init_std_factor: disabled
+  max_seqlen: 4096
+  seed: 42
+  vocab_size: 128256
+  weight_tying: false
+  sliding_window: null
+distributed:
+  dp_shard: 1
+  dp_replicate: 8
+  tp_size: 1
+  selective_activation_checkpointing: false
+  compile: true
+  fsdp_type: full_shard
+  model_dtype: bf16
+  float8_recipe: null
+  float8_filter: layers\.[0-9]+\.
+  matmul_allow_tf32: false
+  detect_anomaly: false
+  compile_cache_size_limit: 8
+  spawn_method: forkserver
+env:
+  MKL_SERVICE_FORCE_INTEL: GNU
+  OMP_NUM_THREADS: '1'
+  MKL_NUM_THREADS: '1'
+  ENABLE_INTRA_NODE_COMM: '1'
+  TORCH_NCCL_AVOID_RECORD_STREAMS: '1'
+  NCCL_IB_TIMEOUT: '22'
+  NCCL_DEBUG: INFO
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: '1'
+checkpoint:
+  dump:
+    every: 2000
+    keep: -1
+  eval:
+    every: 2000
+    keep: -1
+  path: /fsx/craffel/fineinstructions/pretraining/ipt_fineinstructions_all_exp_chat_100b/checkpoints
+  init_ckpt_path: null
+  load_init_optimizer_state: false
+  save_init_ckpt: false
+profiling:
+  run: true
+  trace_folder: profiling
+  mem_warmup: 0
+  mem_steps: 4
+  profile_warmup: 100
+  profile_steps: 4
+logging:
+  freq: 1
+  acc_freq: null
+  wandb: null
+async_eval_gpus: 8
+eval:
+  harness:
+    apply_chat_template: true
+    tasks:
+    - hellaswag
+    - mmlu
+    - commonsense_qa
+    - sciq
+    confirm_run_unsafe_code: true
+  generator:
+    max_tokens: 8192
+    dtype: bf16