| 
							 | 
						 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						precision: amp_bf16  | 
					
					
						
						| 
							 | 
						max_seq_len: 32768 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						tokenizer_name: bert-base-uncased | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						model: | 
					
					
						
						| 
							 | 
						  name: bert | 
					
					
						
						| 
							 | 
						  pretrained_model_name: ${tokenizer_name} | 
					
					
						
						| 
							 | 
						  tokenizer_name: ${tokenizer_name} | 
					
					
						
						| 
							 | 
						  model_config: | 
					
					
						
						| 
							 | 
						    num_attention_heads: 12  | 
					
					
						
						| 
							 | 
						    num_hidden_layers: 12  | 
					
					
						
						| 
							 | 
						    attention_probs_dropout_prob: 0.0  | 
					
					
						
						| 
							 | 
						    max_position_embeddings: 32768 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    monarch_mixer_sequence_mixing: True | 
					
					
						
						| 
							 | 
						    long_conv_l_max: 32768 | 
					
					
						
						| 
							 | 
						    long_conv_kernel_learning_rate: 1e-3 | 
					
					
						
						| 
							 | 
						    hyena_lr_pos_emb: 1e-5 | 
					
					
						
						| 
							 | 
						    hyena_w: 10 | 
					
					
						
						| 
							 | 
						    hyena_wd: 0.1 | 
					
					
						
						| 
							 | 
						    hyena_emb_dim: 5 | 
					
					
						
						| 
							 | 
						    hyena_filter_order: 128 | 
					
					
						
						| 
							 | 
						    hyena_training_additions: False | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    bidirectional: true | 
					
					
						
						| 
							 | 
						    residual_long_conv: true | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    use_glu_mlp: True | 
					
					
						
						| 
							 | 
						    use_monarch_mlp: True | 
					
					
						
						| 
							 | 
						    monarch_mlp_nblocks: 4 | 
					
					
						
						| 
							 | 
						    use_positional_encodings: True | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 |