|
|
|
|
| seed: 42 |
| base_dir: /scratch/sgoel/MeMDLM_v2 |
|
|
|
|
| lm: |
| pretrained_esm: facebook/esm2_t33_650M_UR50D |
| pretrained_evoflow: fredzzp/EvoFlow-650M-context-3070 |
| pretrained_dplm: airkingbd/dplm_650m |
| ft_evoflow: ft_eflow-3070-650M_steps=50k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=8_gclip=1.0 |
| ft_dplm: ft_dplm-650M_steps=5k_layers=3_lr=0.00004_wd=.01_polynom_pwr=1_betas=.9-.98_bsz=32_gclip=1.0 |
|
|
| model: |
| d_model: 1280 |
| num_heads: 2 |
| dropout: 0.5 |
| num_layers: 4 |
| label_pad_value: -100 |
|
|
| optim: |
| type: adamw |
| lr: 3e-5 |
| lr_end: 1e-5 |
| weight_decay: 0.01 |
| beta1: 0.9 |
| beta2: 0.98 |
| power: 1 |
|
|
|
|
| training: |
| mode: test |
| n_layers: 4 |
| max_steps: 3000 |
| warmup_steps: 150 |
| log_every_n_steps: 10 |
| num_sanity_val_steps: 2 |
| val_check_interval: 250 |
| enable_progress_bar: true |
| grad_clip_val: 1.0 |
| devices: [0] |
|
|
| guidance: |
| n_steps: 128 |
| alpha: 3 |
| gamma: 0.3 |
| saliency_eps: 1e-4 |
| saliency_t: 2.0 |
| sampling_t: 0.7 |
| boltzmann_t: 0.3 |
| top_p: 0.2 |
| steps: 128 |
| prior: lm_probs |
|
|
| data: |
| batch_size: 32 |
| max_seq_len: 1024 |
| train: ${base_dir}/data/classifier/train.csv |
| test: ${base_dir}/data/classifier/test.csv |
| val: ${base_dir}/data/classifier/val.csv |
|
|
|
|
| wandb: |
| project: memdlm_guidance |
| group: programmablebio |
| name: new_data_cleaned_steps3k_lr3e-5_bsz32_heads2_drpt0.5_layers4 |
| id: ${.name}_${seed} |
|
|
|
|
| checkpointing: |
| save_every_n_steps: 250 |
| save_dir: ${base_dir}/checkpoints/${wandb.name} |
| resume_ckpt_path: ${checkpointing.save_dir}/last.ckpt |
| best_ckpt_path: ${checkpointing.save_dir}/best_model.ckpt |