Commit 
							
							·
						
						aa5338e
	
1
								Parent(s):
							
							65bc7d3
								
Update model
Browse files- README.md +310 -3
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/17epoch.pth +3 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/config.yaml +235 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/accuracy.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/backward_time.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/class_loss.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/clip.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/forward_time.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/geo_loss_all.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/geo_loss_downstream.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/gpu_max_cached_mem_GB.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/grad_norm.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_layer32.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_layer36.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_layer40.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_layer44.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_mean.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/iter_time.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/loss.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/loss_scale.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/optim0_lr0.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/optim_step_time.png +0 -0
 - exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/train_time.png +0 -0
 - meta.yaml +8 -0
 
    	
        README.md
    CHANGED
    
    | 
         @@ -1,3 +1,310 @@ 
     | 
|
| 1 | 
         
            -
            ---
         
     | 
| 2 | 
         
            -
             
     | 
| 3 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            ---
         
     | 
| 2 | 
         
            +
            tags:
         
     | 
| 3 | 
         
            +
            - espnet
         
     | 
| 4 | 
         
            +
            - audio
         
     | 
| 5 | 
         
            +
            - language-identification
         
     | 
| 6 | 
         
            +
            language: multilingual
         
     | 
| 7 | 
         
            +
            datasets:
         
     | 
| 8 | 
         
            +
            - geolid
         
     | 
| 9 | 
         
            +
            license: cc-by-4.0
         
     | 
| 10 | 
         
            +
            ---
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            ## ESPnet2 LID model
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            ### `espnet/geolid_vl107only_shared_frozen`
         
     | 
| 15 | 
         
            +
             
     | 
| 16 | 
         
            +
            This model was trained by Qingzheng-Wang using geolid recipe in [espnet](https://github.com/espnet/espnet/).
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            ### Demo: How to use in ESPnet2
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
         
     | 
| 21 | 
         
            +
            if you haven't done that already.
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
            ```bash
         
     | 
| 24 | 
         
            +
            cd espnet
         
     | 
| 25 | 
         
            +
            git checkout 77e4293952083b9e32bc19a5ddc19efe45e70e4a
         
     | 
| 26 | 
         
            +
            pip install -e .
         
     | 
| 27 | 
         
            +
            cd egs2/geolid/lid1
         
     | 
| 28 | 
         
            +
            ./run.sh --skip_data_prep false --skip_train true --download_model espnet/geolid_vl107only_shared_frozen
         
     | 
| 29 | 
         
            +
            ```
         
     | 
| 30 | 
         
            +
             
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
            ## LID config
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
            <details><summary>expand</summary>
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            ```
         
     | 
| 38 | 
         
            +
            config: conf/voxlingua107_only/mms_ecapa_upcon_32_44_it0.4_shared_frozen.yaml
         
     | 
| 39 | 
         
            +
            print_config: false
         
     | 
| 40 | 
         
            +
            log_level: INFO
         
     | 
| 41 | 
         
            +
            drop_last_iter: false
         
     | 
| 42 | 
         
            +
            dry_run: false
         
     | 
| 43 | 
         
            +
            iterator_type: category
         
     | 
| 44 | 
         
            +
            valid_iterator_type: category
         
     | 
| 45 | 
         
            +
            output_dir: exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw
         
     | 
| 46 | 
         
            +
            ngpu: 1
         
     | 
| 47 | 
         
            +
            seed: 3702
         
     | 
| 48 | 
         
            +
            num_workers: 8
         
     | 
| 49 | 
         
            +
            num_att_plot: 0
         
     | 
| 50 | 
         
            +
            dist_backend: nccl
         
     | 
| 51 | 
         
            +
            dist_init_method: env://
         
     | 
| 52 | 
         
            +
            dist_world_size: null
         
     | 
| 53 | 
         
            +
            dist_rank: null
         
     | 
| 54 | 
         
            +
            local_rank: 0
         
     | 
| 55 | 
         
            +
            dist_master_addr: null
         
     | 
| 56 | 
         
            +
            dist_master_port: null
         
     | 
| 57 | 
         
            +
            dist_launcher: null
         
     | 
| 58 | 
         
            +
            multiprocessing_distributed: false
         
     | 
| 59 | 
         
            +
            unused_parameters: true
         
     | 
| 60 | 
         
            +
            sharded_ddp: false
         
     | 
| 61 | 
         
            +
            use_deepspeed: false
         
     | 
| 62 | 
         
            +
            deepspeed_config: null
         
     | 
| 63 | 
         
            +
            gradient_as_bucket_view: true
         
     | 
| 64 | 
         
            +
            ddp_comm_hook: null
         
     | 
| 65 | 
         
            +
            cudnn_enabled: true
         
     | 
| 66 | 
         
            +
            cudnn_benchmark: true
         
     | 
| 67 | 
         
            +
            cudnn_deterministic: false
         
     | 
| 68 | 
         
            +
            use_tf32: false
         
     | 
| 69 | 
         
            +
            collect_stats: false
         
     | 
| 70 | 
         
            +
            write_collected_feats: false
         
     | 
| 71 | 
         
            +
            max_epoch: 30
         
     | 
| 72 | 
         
            +
            patience: null
         
     | 
| 73 | 
         
            +
            val_scheduler_criterion:
         
     | 
| 74 | 
         
            +
            - valid
         
     | 
| 75 | 
         
            +
            - loss
         
     | 
| 76 | 
         
            +
            early_stopping_criterion:
         
     | 
| 77 | 
         
            +
            - valid
         
     | 
| 78 | 
         
            +
            - loss
         
     | 
| 79 | 
         
            +
            - min
         
     | 
| 80 | 
         
            +
            best_model_criterion:
         
     | 
| 81 | 
         
            +
            -   - valid
         
     | 
| 82 | 
         
            +
                - accuracy
         
     | 
| 83 | 
         
            +
                - max
         
     | 
| 84 | 
         
            +
            keep_nbest_models: 2
         
     | 
| 85 | 
         
            +
            nbest_averaging_interval: 0
         
     | 
| 86 | 
         
            +
            grad_clip: 9999
         
     | 
| 87 | 
         
            +
            grad_clip_type: 2.0
         
     | 
| 88 | 
         
            +
            grad_noise: false
         
     | 
| 89 | 
         
            +
            accum_grad: 2
         
     | 
| 90 | 
         
            +
            no_forward_run: false
         
     | 
| 91 | 
         
            +
            resume: true
         
     | 
| 92 | 
         
            +
            train_dtype: float32
         
     | 
| 93 | 
         
            +
            use_amp: true
         
     | 
| 94 | 
         
            +
            log_interval: 100
         
     | 
| 95 | 
         
            +
            use_matplotlib: true
         
     | 
| 96 | 
         
            +
            use_tensorboard: true
         
     | 
| 97 | 
         
            +
            create_graph_in_tensorboard: false
         
     | 
| 98 | 
         
            +
            use_wandb: false
         
     | 
| 99 | 
         
            +
            wandb_project: null
         
     | 
| 100 | 
         
            +
            wandb_id: null
         
     | 
| 101 | 
         
            +
            wandb_entity: null
         
     | 
| 102 | 
         
            +
            wandb_name: null
         
     | 
| 103 | 
         
            +
            wandb_model_log_interval: -1
         
     | 
| 104 | 
         
            +
            detect_anomaly: false
         
     | 
| 105 | 
         
            +
            use_adapter: false
         
     | 
| 106 | 
         
            +
            adapter: lora
         
     | 
| 107 | 
         
            +
            save_strategy: all
         
     | 
| 108 | 
         
            +
            adapter_conf: {}
         
     | 
| 109 | 
         
            +
            pretrain_path: null
         
     | 
| 110 | 
         
            +
            init_param: []
         
     | 
| 111 | 
         
            +
            ignore_init_mismatch: false
         
     | 
| 112 | 
         
            +
            freeze_param: []
         
     | 
| 113 | 
         
            +
            num_iters_per_epoch: 1000
         
     | 
| 114 | 
         
            +
            batch_size: 20
         
     | 
| 115 | 
         
            +
            valid_batch_size: null
         
     | 
| 116 | 
         
            +
            batch_bins: 2880000
         
     | 
| 117 | 
         
            +
            valid_batch_bins: null
         
     | 
| 118 | 
         
            +
            category_sample_size: 10
         
     | 
| 119 | 
         
            +
            upsampling_factor: 0.5
         
     | 
| 120 | 
         
            +
            category_upsampling_factor: 0.5
         
     | 
| 121 | 
         
            +
            dataset_upsampling_factor: 0.5
         
     | 
| 122 | 
         
            +
            dataset_scaling_factor: 1.2
         
     | 
| 123 | 
         
            +
            max_batch_size: 16
         
     | 
| 124 | 
         
            +
            min_batch_size: 1
         
     | 
| 125 | 
         
            +
            train_shape_file:
         
     | 
| 126 | 
         
            +
            - exp_voxlingua107_only/lid_stats_16k/train/speech_shape
         
     | 
| 127 | 
         
            +
            valid_shape_file:
         
     | 
| 128 | 
         
            +
            - exp_voxlingua107_only/lid_stats_16k/valid/speech_shape
         
     | 
| 129 | 
         
            +
            batch_type: catpow
         
     | 
| 130 | 
         
            +
            language_upsampling_factor: 0.5
         
     | 
| 131 | 
         
            +
            valid_batch_type: null
         
     | 
| 132 | 
         
            +
            fold_length:
         
     | 
| 133 | 
         
            +
            - 120000
         
     | 
| 134 | 
         
            +
            sort_in_batch: descending
         
     | 
| 135 | 
         
            +
            shuffle_within_batch: false
         
     | 
| 136 | 
         
            +
            sort_batch: descending
         
     | 
| 137 | 
         
            +
            multiple_iterator: false
         
     | 
| 138 | 
         
            +
            chunk_length: 500
         
     | 
| 139 | 
         
            +
            chunk_shift_ratio: 0.5
         
     | 
| 140 | 
         
            +
            num_cache_chunks: 1024
         
     | 
| 141 | 
         
            +
            chunk_excluded_key_prefixes: []
         
     | 
| 142 | 
         
            +
            chunk_default_fs: null
         
     | 
| 143 | 
         
            +
            chunk_max_abs_length: null
         
     | 
| 144 | 
         
            +
            chunk_discard_short_samples: true
         
     | 
| 145 | 
         
            +
            train_data_path_and_name_and_type:
         
     | 
| 146 | 
         
            +
            -   - dump/raw/train_voxlingua107_lang/wav.scp
         
     | 
| 147 | 
         
            +
                - speech
         
     | 
| 148 | 
         
            +
                - sound
         
     | 
| 149 | 
         
            +
            -   - dump/raw/train_voxlingua107_lang/utt2lang
         
     | 
| 150 | 
         
            +
                - lid_labels
         
     | 
| 151 | 
         
            +
                - text
         
     | 
| 152 | 
         
            +
            valid_data_path_and_name_and_type:
         
     | 
| 153 | 
         
            +
            -   - dump/raw/dev_voxlingua107_lang/wav.scp
         
     | 
| 154 | 
         
            +
                - speech
         
     | 
| 155 | 
         
            +
                - sound
         
     | 
| 156 | 
         
            +
            -   - dump/raw/dev_voxlingua107_lang/utt2lang
         
     | 
| 157 | 
         
            +
                - lid_labels
         
     | 
| 158 | 
         
            +
                - text
         
     | 
| 159 | 
         
            +
            multi_task_dataset: false
         
     | 
| 160 | 
         
            +
            allow_variable_data_keys: false
         
     | 
| 161 | 
         
            +
            max_cache_size: 0.0
         
     | 
| 162 | 
         
            +
            max_cache_fd: 32
         
     | 
| 163 | 
         
            +
            allow_multi_rates: false
         
     | 
| 164 | 
         
            +
            valid_max_cache_size: null
         
     | 
| 165 | 
         
            +
            exclude_weight_decay: false
         
     | 
| 166 | 
         
            +
            exclude_weight_decay_conf: {}
         
     | 
| 167 | 
         
            +
            optim: adam
         
     | 
| 168 | 
         
            +
            optim_conf:
         
     | 
| 169 | 
         
            +
                lr: 5.0e-06
         
     | 
| 170 | 
         
            +
                betas:
         
     | 
| 171 | 
         
            +
                - 0.9
         
     | 
| 172 | 
         
            +
                - 0.98
         
     | 
| 173 | 
         
            +
            scheduler: tristagelr
         
     | 
| 174 | 
         
            +
            scheduler_conf:
         
     | 
| 175 | 
         
            +
                max_steps: 30000
         
     | 
| 176 | 
         
            +
                warmup_ratio: 0.3
         
     | 
| 177 | 
         
            +
                hold_ratio: 0.2
         
     | 
| 178 | 
         
            +
                decay_ratio: 0.5
         
     | 
| 179 | 
         
            +
                init_lr_scale: 0.6
         
     | 
| 180 | 
         
            +
                final_lr_scale: 0.1
         
     | 
| 181 | 
         
            +
            init: null
         
     | 
| 182 | 
         
            +
            use_preprocessor: true
         
     | 
| 183 | 
         
            +
            input_size: null
         
     | 
| 184 | 
         
            +
            target_duration: 3.0
         
     | 
| 185 | 
         
            +
            lang2utt: dump/raw/train_voxlingua107_lang/lang2utt
         
     | 
| 186 | 
         
            +
            lang_num: 107
         
     | 
| 187 | 
         
            +
            sample_rate: 16000
         
     | 
| 188 | 
         
            +
            num_eval: 10
         
     | 
| 189 | 
         
            +
            rir_scp: ''
         
     | 
| 190 | 
         
            +
            model: upstream_condition
         
     | 
| 191 | 
         
            +
            model_conf:
         
     | 
| 192 | 
         
            +
                lang2vec_conditioning_layers:
         
     | 
| 193 | 
         
            +
                - 32
         
     | 
| 194 | 
         
            +
                - 36
         
     | 
| 195 | 
         
            +
                - 40
         
     | 
| 196 | 
         
            +
                - 44
         
     | 
| 197 | 
         
            +
                apply_intermediate_lang2vec_loss: true
         
     | 
| 198 | 
         
            +
                apply_intermediate_lang2vec_condition: true
         
     | 
| 199 | 
         
            +
                inter_lang2vec_loss_weight: 0.4
         
     | 
| 200 | 
         
            +
                cutoff_gradient_from_backbone: true
         
     | 
| 201 | 
         
            +
                cutoff_gradient_before_condproj: true
         
     | 
| 202 | 
         
            +
                shared_conditioning_proj: true
         
     | 
| 203 | 
         
            +
            frontend: s3prl_condition
         
     | 
| 204 | 
         
            +
            frontend_conf:
         
     | 
| 205 | 
         
            +
                frontend_conf:
         
     | 
| 206 | 
         
            +
                    upstream: hf_wav2vec2_condition
         
     | 
| 207 | 
         
            +
                    path_or_url: facebook/mms-1b
         
     | 
| 208 | 
         
            +
                download_dir: ./hub
         
     | 
| 209 | 
         
            +
                multilayer_feature: true
         
     | 
| 210 | 
         
            +
            specaug: null
         
     | 
| 211 | 
         
            +
            specaug_conf: {}
         
     | 
| 212 | 
         
            +
            normalize: utterance_mvn
         
     | 
| 213 | 
         
            +
            normalize_conf:
         
     | 
| 214 | 
         
            +
                norm_vars: false
         
     | 
| 215 | 
         
            +
            encoder: ecapa_tdnn
         
     | 
| 216 | 
         
            +
            encoder_conf:
         
     | 
| 217 | 
         
            +
                model_scale: 8
         
     | 
| 218 | 
         
            +
                ndim: 512
         
     | 
| 219 | 
         
            +
                output_size: 1536
         
     | 
| 220 | 
         
            +
            pooling: chn_attn_stat
         
     | 
| 221 | 
         
            +
            pooling_conf: {}
         
     | 
| 222 | 
         
            +
            projector: rawnet3
         
     | 
| 223 | 
         
            +
            projector_conf:
         
     | 
| 224 | 
         
            +
                output_size: 192
         
     | 
| 225 | 
         
            +
            encoder_condition: identity
         
     | 
| 226 | 
         
            +
            encoder_condition_conf: {}
         
     | 
| 227 | 
         
            +
            pooling_condition: chn_attn_stat
         
     | 
| 228 | 
         
            +
            pooling_condition_conf: {}
         
     | 
| 229 | 
         
            +
            projector_condition: rawnet3
         
     | 
| 230 | 
         
            +
            projector_condition_conf: {}
         
     | 
| 231 | 
         
            +
            preprocessor: lid
         
     | 
| 232 | 
         
            +
            preprocessor_conf:
         
     | 
| 233 | 
         
            +
                fix_duration: false
         
     | 
| 234 | 
         
            +
                sample_rate: 16000
         
     | 
| 235 | 
         
            +
                noise_apply_prob: 0.0
         
     | 
| 236 | 
         
            +
                noise_info:
         
     | 
| 237 | 
         
            +
                -   - 1.0
         
     | 
| 238 | 
         
            +
                    - dump/raw/musan_speech.scp
         
     | 
| 239 | 
         
            +
                    -   - 4
         
     | 
| 240 | 
         
            +
                        - 7
         
     | 
| 241 | 
         
            +
                    -   - 13
         
     | 
| 242 | 
         
            +
                        - 20
         
     | 
| 243 | 
         
            +
                -   - 1.0
         
     | 
| 244 | 
         
            +
                    - dump/raw/musan_noise.scp
         
     | 
| 245 | 
         
            +
                    -   - 1
         
     | 
| 246 | 
         
            +
                        - 1
         
     | 
| 247 | 
         
            +
                    -   - 0
         
     | 
| 248 | 
         
            +
                        - 15
         
     | 
| 249 | 
         
            +
                -   - 1.0
         
     | 
| 250 | 
         
            +
                    - dump/raw/musan_music.scp
         
     | 
| 251 | 
         
            +
                    -   - 1
         
     | 
| 252 | 
         
            +
                        - 1
         
     | 
| 253 | 
         
            +
                    -   - 5
         
     | 
| 254 | 
         
            +
                        - 15
         
     | 
| 255 | 
         
            +
                rir_apply_prob: 0.0
         
     | 
| 256 | 
         
            +
                rir_scp: dump/raw/rirs.scp
         
     | 
| 257 | 
         
            +
                use_lang2vec: true
         
     | 
| 258 | 
         
            +
                lang2vec_type: geo
         
     | 
| 259 | 
         
            +
            loss: aamsoftmax_sc_topk_lang2vec
         
     | 
| 260 | 
         
            +
            loss_conf:
         
     | 
| 261 | 
         
            +
                margin: 0.5
         
     | 
| 262 | 
         
            +
                scale: 30
         
     | 
| 263 | 
         
            +
                K: 3
         
     | 
| 264 | 
         
            +
                mp: 0.06
         
     | 
| 265 | 
         
            +
                k_top: 5
         
     | 
| 266 | 
         
            +
                lang2vec_dim: 299
         
     | 
| 267 | 
         
            +
                lang2vec_type: geo
         
     | 
| 268 | 
         
            +
                lang2vec_weight: 0.2
         
     | 
| 269 | 
         
            +
            required:
         
     | 
| 270 | 
         
            +
            - output_dir
         
     | 
| 271 | 
         
            +
            version: '202506'
         
     | 
| 272 | 
         
            +
            distributed: false
         
     | 
| 273 | 
         
            +
            ```
         
     | 
| 274 | 
         
            +
             
     | 
| 275 | 
         
            +
            </details>
         
     | 
| 276 | 
         
            +
             
     | 
| 277 | 
         
            +
             
     | 
| 278 | 
         
            +
             
     | 
| 279 | 
         
            +
            ### Citing ESPnet
         
     | 
| 280 | 
         
            +
             
     | 
| 281 | 
         
            +
            ```BibTex
         
     | 
| 282 | 
         
            +
            @inproceedings{watanabe2018espnet,
         
     | 
| 283 | 
         
            +
              author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
         
     | 
| 284 | 
         
            +
              title={{ESPnet}: End-to-End Speech Processing Toolkit},
         
     | 
| 285 | 
         
            +
              year={2018},
         
     | 
| 286 | 
         
            +
              booktitle={Proceedings of Interspeech},
         
     | 
| 287 | 
         
            +
              pages={2207--2211},
         
     | 
| 288 | 
         
            +
              doi={10.21437/Interspeech.2018-1456},
         
     | 
| 289 | 
         
            +
              url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
         
     | 
| 290 | 
         
            +
            }
         
     | 
| 291 | 
         
            +
             
     | 
| 292 | 
         
            +
             
     | 
| 293 | 
         
            +
             
     | 
| 294 | 
         
            +
             
     | 
| 295 | 
         
            +
             
     | 
| 296 | 
         
            +
             
     | 
| 297 | 
         
            +
            ```
         
     | 
| 298 | 
         
            +
             
     | 
| 299 | 
         
            +
            or arXiv:
         
     | 
| 300 | 
         
            +
             
     | 
| 301 | 
         
            +
            ```bibtex
         
     | 
| 302 | 
         
            +
            @misc{watanabe2018espnet,
         
     | 
| 303 | 
         
            +
              title={ESPnet: End-to-End Speech Processing Toolkit},
         
     | 
| 304 | 
         
            +
              author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
         
     | 
| 305 | 
         
            +
              year={2018},
         
     | 
| 306 | 
         
            +
              eprint={1804.00015},
         
     | 
| 307 | 
         
            +
              archivePrefix={arXiv},
         
     | 
| 308 | 
         
            +
              primaryClass={cs.CL}
         
     | 
| 309 | 
         
            +
            }
         
     | 
| 310 | 
         
            +
            ```
         
     | 
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/17epoch.pth
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:ecd0ae1007eb2bb3e2f550b749c68763d9f501fb65e637dc8a2ddd0ad8d8e11d
         
     | 
| 3 | 
         
            +
            size 3909077285
         
     | 
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/config.yaml
    ADDED
    
    | 
         @@ -0,0 +1,235 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            config: conf/voxlingua107_only/mms_ecapa_upcon_32_44_it0.4_shared_frozen.yaml
         
     | 
| 2 | 
         
            +
            print_config: false
         
     | 
| 3 | 
         
            +
            log_level: INFO
         
     | 
| 4 | 
         
            +
            drop_last_iter: false
         
     | 
| 5 | 
         
            +
            dry_run: false
         
     | 
| 6 | 
         
            +
            iterator_type: category
         
     | 
| 7 | 
         
            +
            valid_iterator_type: category
         
     | 
| 8 | 
         
            +
            output_dir: exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw
         
     | 
| 9 | 
         
            +
            ngpu: 1
         
     | 
| 10 | 
         
            +
            seed: 3702
         
     | 
| 11 | 
         
            +
            num_workers: 8
         
     | 
| 12 | 
         
            +
            num_att_plot: 0
         
     | 
| 13 | 
         
            +
            dist_backend: nccl
         
     | 
| 14 | 
         
            +
            dist_init_method: env://
         
     | 
| 15 | 
         
            +
            dist_world_size: null
         
     | 
| 16 | 
         
            +
            dist_rank: null
         
     | 
| 17 | 
         
            +
            local_rank: 0
         
     | 
| 18 | 
         
            +
            dist_master_addr: null
         
     | 
| 19 | 
         
            +
            dist_master_port: null
         
     | 
| 20 | 
         
            +
            dist_launcher: null
         
     | 
| 21 | 
         
            +
            multiprocessing_distributed: false
         
     | 
| 22 | 
         
            +
            unused_parameters: true
         
     | 
| 23 | 
         
            +
            sharded_ddp: false
         
     | 
| 24 | 
         
            +
            use_deepspeed: false
         
     | 
| 25 | 
         
            +
            deepspeed_config: null
         
     | 
| 26 | 
         
            +
            gradient_as_bucket_view: true
         
     | 
| 27 | 
         
            +
            ddp_comm_hook: null
         
     | 
| 28 | 
         
            +
            cudnn_enabled: true
         
     | 
| 29 | 
         
            +
            cudnn_benchmark: true
         
     | 
| 30 | 
         
            +
            cudnn_deterministic: false
         
     | 
| 31 | 
         
            +
            use_tf32: false
         
     | 
| 32 | 
         
            +
            collect_stats: false
         
     | 
| 33 | 
         
            +
            write_collected_feats: false
         
     | 
| 34 | 
         
            +
            max_epoch: 30
         
     | 
| 35 | 
         
            +
            patience: null
         
     | 
| 36 | 
         
            +
            val_scheduler_criterion:
         
     | 
| 37 | 
         
            +
            - valid
         
     | 
| 38 | 
         
            +
            - loss
         
     | 
| 39 | 
         
            +
            early_stopping_criterion:
         
     | 
| 40 | 
         
            +
            - valid
         
     | 
| 41 | 
         
            +
            - loss
         
     | 
| 42 | 
         
            +
            - min
         
     | 
| 43 | 
         
            +
            best_model_criterion:
         
     | 
| 44 | 
         
            +
            -   - valid
         
     | 
| 45 | 
         
            +
                - accuracy
         
     | 
| 46 | 
         
            +
                - max
         
     | 
| 47 | 
         
            +
            keep_nbest_models: 2
         
     | 
| 48 | 
         
            +
            nbest_averaging_interval: 0
         
     | 
| 49 | 
         
            +
            grad_clip: 9999
         
     | 
| 50 | 
         
            +
            grad_clip_type: 2.0
         
     | 
| 51 | 
         
            +
            grad_noise: false
         
     | 
| 52 | 
         
            +
            accum_grad: 2
         
     | 
| 53 | 
         
            +
            no_forward_run: false
         
     | 
| 54 | 
         
            +
            resume: true
         
     | 
| 55 | 
         
            +
            train_dtype: float32
         
     | 
| 56 | 
         
            +
            use_amp: true
         
     | 
| 57 | 
         
            +
            log_interval: 100
         
     | 
| 58 | 
         
            +
            use_matplotlib: true
         
     | 
| 59 | 
         
            +
            use_tensorboard: true
         
     | 
| 60 | 
         
            +
            create_graph_in_tensorboard: false
         
     | 
| 61 | 
         
            +
            use_wandb: false
         
     | 
| 62 | 
         
            +
            wandb_project: null
         
     | 
| 63 | 
         
            +
            wandb_id: null
         
     | 
| 64 | 
         
            +
            wandb_entity: null
         
     | 
| 65 | 
         
            +
            wandb_name: null
         
     | 
| 66 | 
         
            +
            wandb_model_log_interval: -1
         
     | 
| 67 | 
         
            +
            detect_anomaly: false
         
     | 
| 68 | 
         
            +
            use_adapter: false
         
     | 
| 69 | 
         
            +
            adapter: lora
         
     | 
| 70 | 
         
            +
            save_strategy: all
         
     | 
| 71 | 
         
            +
            adapter_conf: {}
         
     | 
| 72 | 
         
            +
            pretrain_path: null
         
     | 
| 73 | 
         
            +
            init_param: []
         
     | 
| 74 | 
         
            +
            ignore_init_mismatch: false
         
     | 
| 75 | 
         
            +
            freeze_param: []
         
     | 
| 76 | 
         
            +
            num_iters_per_epoch: 1000
         
     | 
| 77 | 
         
            +
            batch_size: 20
         
     | 
| 78 | 
         
            +
            valid_batch_size: null
         
     | 
| 79 | 
         
            +
            batch_bins: 2880000
         
     | 
| 80 | 
         
            +
            valid_batch_bins: null
         
     | 
| 81 | 
         
            +
            category_sample_size: 10
         
     | 
| 82 | 
         
            +
            upsampling_factor: 0.5
         
     | 
| 83 | 
         
            +
            category_upsampling_factor: 0.5
         
     | 
| 84 | 
         
            +
            dataset_upsampling_factor: 0.5
         
     | 
| 85 | 
         
            +
            dataset_scaling_factor: 1.2
         
     | 
| 86 | 
         
            +
            max_batch_size: 16
         
     | 
| 87 | 
         
            +
            min_batch_size: 1
         
     | 
| 88 | 
         
            +
            train_shape_file:
         
     | 
| 89 | 
         
            +
            - exp_voxlingua107_only/lid_stats_16k/train/speech_shape
         
     | 
| 90 | 
         
            +
            valid_shape_file:
         
     | 
| 91 | 
         
            +
            - exp_voxlingua107_only/lid_stats_16k/valid/speech_shape
         
     | 
| 92 | 
         
            +
            batch_type: catpow
         
     | 
| 93 | 
         
            +
            language_upsampling_factor: 0.5
         
     | 
| 94 | 
         
            +
            valid_batch_type: null
         
     | 
| 95 | 
         
            +
            fold_length:
         
     | 
| 96 | 
         
            +
            - 120000
         
     | 
| 97 | 
         
            +
            sort_in_batch: descending
         
     | 
| 98 | 
         
            +
            shuffle_within_batch: false
         
     | 
| 99 | 
         
            +
            sort_batch: descending
         
     | 
| 100 | 
         
            +
            multiple_iterator: false
         
     | 
| 101 | 
         
            +
            chunk_length: 500
         
     | 
| 102 | 
         
            +
            chunk_shift_ratio: 0.5
         
     | 
| 103 | 
         
            +
            num_cache_chunks: 1024
         
     | 
| 104 | 
         
            +
            chunk_excluded_key_prefixes: []
         
     | 
| 105 | 
         
            +
            chunk_default_fs: null
         
     | 
| 106 | 
         
            +
            chunk_max_abs_length: null
         
     | 
| 107 | 
         
            +
            chunk_discard_short_samples: true
         
     | 
| 108 | 
         
            +
            train_data_path_and_name_and_type:
         
     | 
| 109 | 
         
            +
            -   - dump/raw/train_voxlingua107_lang/wav.scp
         
     | 
| 110 | 
         
            +
                - speech
         
     | 
| 111 | 
         
            +
                - sound
         
     | 
| 112 | 
         
            +
            -   - dump/raw/train_voxlingua107_lang/utt2lang
         
     | 
| 113 | 
         
            +
                - lid_labels
         
     | 
| 114 | 
         
            +
                - text
         
     | 
| 115 | 
         
            +
            valid_data_path_and_name_and_type:
         
     | 
| 116 | 
         
            +
            -   - dump/raw/dev_voxlingua107_lang/wav.scp
         
     | 
| 117 | 
         
            +
                - speech
         
     | 
| 118 | 
         
            +
                - sound
         
     | 
| 119 | 
         
            +
            -   - dump/raw/dev_voxlingua107_lang/utt2lang
         
     | 
| 120 | 
         
            +
                - lid_labels
         
     | 
| 121 | 
         
            +
                - text
         
     | 
| 122 | 
         
            +
            multi_task_dataset: false
         
     | 
| 123 | 
         
            +
            allow_variable_data_keys: false
         
     | 
| 124 | 
         
            +
            max_cache_size: 0.0
         
     | 
| 125 | 
         
            +
            max_cache_fd: 32
         
     | 
| 126 | 
         
            +
            allow_multi_rates: false
         
     | 
| 127 | 
         
            +
            valid_max_cache_size: null
         
     | 
| 128 | 
         
            +
            exclude_weight_decay: false
         
     | 
| 129 | 
         
            +
            exclude_weight_decay_conf: {}
         
     | 
| 130 | 
         
            +
            optim: adam
         
     | 
| 131 | 
         
            +
            optim_conf:
         
     | 
| 132 | 
         
            +
                lr: 5.0e-06
         
     | 
| 133 | 
         
            +
                betas:
         
     | 
| 134 | 
         
            +
                - 0.9
         
     | 
| 135 | 
         
            +
                - 0.98
         
     | 
| 136 | 
         
            +
            scheduler: tristagelr
         
     | 
| 137 | 
         
            +
            scheduler_conf:
         
     | 
| 138 | 
         
            +
                max_steps: 30000
         
     | 
| 139 | 
         
            +
                warmup_ratio: 0.3
         
     | 
| 140 | 
         
            +
                hold_ratio: 0.2
         
     | 
| 141 | 
         
            +
                decay_ratio: 0.5
         
     | 
| 142 | 
         
            +
                init_lr_scale: 0.6
         
     | 
| 143 | 
         
            +
                final_lr_scale: 0.1
         
     | 
| 144 | 
         
            +
            init: null
         
     | 
| 145 | 
         
            +
            use_preprocessor: true
         
     | 
| 146 | 
         
            +
            input_size: null
         
     | 
| 147 | 
         
            +
            target_duration: 3.0
         
     | 
| 148 | 
         
            +
            lang2utt: dump/raw/train_voxlingua107_lang/lang2utt
         
     | 
| 149 | 
         
            +
            lang_num: 107
         
     | 
| 150 | 
         
            +
            sample_rate: 16000
         
     | 
| 151 | 
         
            +
            num_eval: 10
         
     | 
| 152 | 
         
            +
            rir_scp: ''
         
     | 
| 153 | 
         
            +
            model: upstream_condition
         
     | 
| 154 | 
         
            +
            model_conf:
         
     | 
| 155 | 
         
            +
                lang2vec_conditioning_layers:
         
     | 
| 156 | 
         
            +
                - 32
         
     | 
| 157 | 
         
            +
                - 36
         
     | 
| 158 | 
         
            +
                - 40
         
     | 
| 159 | 
         
            +
                - 44
         
     | 
| 160 | 
         
            +
                apply_intermediate_lang2vec_loss: true
         
     | 
| 161 | 
         
            +
                apply_intermediate_lang2vec_condition: true
         
     | 
| 162 | 
         
            +
                inter_lang2vec_loss_weight: 0.4
         
     | 
| 163 | 
         
            +
                cutoff_gradient_from_backbone: true
         
     | 
| 164 | 
         
            +
                cutoff_gradient_before_condproj: true
         
     | 
| 165 | 
         
            +
                shared_conditioning_proj: true
         
     | 
| 166 | 
         
            +
            frontend: s3prl_condition
         
     | 
| 167 | 
         
            +
            frontend_conf:
         
     | 
| 168 | 
         
            +
                frontend_conf:
         
     | 
| 169 | 
         
            +
                    upstream: hf_wav2vec2_condition
         
     | 
| 170 | 
         
            +
                    path_or_url: facebook/mms-1b
         
     | 
| 171 | 
         
            +
                download_dir: ./hub
         
     | 
| 172 | 
         
            +
                multilayer_feature: true
         
     | 
| 173 | 
         
            +
            specaug: null
         
     | 
| 174 | 
         
            +
            specaug_conf: {}
         
     | 
| 175 | 
         
            +
            normalize: utterance_mvn
         
     | 
| 176 | 
         
            +
            normalize_conf:
         
     | 
| 177 | 
         
            +
                norm_vars: false
         
     | 
| 178 | 
         
            +
            encoder: ecapa_tdnn
         
     | 
| 179 | 
         
            +
            encoder_conf:
         
     | 
| 180 | 
         
            +
                model_scale: 8
         
     | 
| 181 | 
         
            +
                ndim: 512
         
     | 
| 182 | 
         
            +
                output_size: 1536
         
     | 
| 183 | 
         
            +
            pooling: chn_attn_stat
         
     | 
| 184 | 
         
            +
            pooling_conf: {}
         
     | 
| 185 | 
         
            +
            projector: rawnet3
         
     | 
| 186 | 
         
            +
            projector_conf:
         
     | 
| 187 | 
         
            +
                output_size: 192
         
     | 
| 188 | 
         
            +
            encoder_condition: identity
         
     | 
| 189 | 
         
            +
            encoder_condition_conf: {}
         
     | 
| 190 | 
         
            +
            pooling_condition: chn_attn_stat
         
     | 
| 191 | 
         
            +
            pooling_condition_conf: {}
         
     | 
| 192 | 
         
            +
            projector_condition: rawnet3
         
     | 
| 193 | 
         
            +
            projector_condition_conf: {}
         
     | 
| 194 | 
         
            +
            preprocessor: lid
         
     | 
| 195 | 
         
            +
            preprocessor_conf:
         
     | 
| 196 | 
         
            +
                fix_duration: false
         
     | 
| 197 | 
         
            +
                sample_rate: 16000
         
     | 
| 198 | 
         
            +
                noise_apply_prob: 0.0
         
     | 
| 199 | 
         
            +
                noise_info:
         
     | 
| 200 | 
         
            +
                -   - 1.0
         
     | 
| 201 | 
         
            +
                    - dump/raw/musan_speech.scp
         
     | 
| 202 | 
         
            +
                    -   - 4
         
     | 
| 203 | 
         
            +
                        - 7
         
     | 
| 204 | 
         
            +
                    -   - 13
         
     | 
| 205 | 
         
            +
                        - 20
         
     | 
| 206 | 
         
            +
                -   - 1.0
         
     | 
| 207 | 
         
            +
                    - dump/raw/musan_noise.scp
         
     | 
| 208 | 
         
            +
                    -   - 1
         
     | 
| 209 | 
         
            +
                        - 1
         
     | 
| 210 | 
         
            +
                    -   - 0
         
     | 
| 211 | 
         
            +
                        - 15
         
     | 
| 212 | 
         
            +
                -   - 1.0
         
     | 
| 213 | 
         
            +
                    - dump/raw/musan_music.scp
         
     | 
| 214 | 
         
            +
                    -   - 1
         
     | 
| 215 | 
         
            +
                        - 1
         
     | 
| 216 | 
         
            +
                    -   - 5
         
     | 
| 217 | 
         
            +
                        - 15
         
     | 
| 218 | 
         
            +
                rir_apply_prob: 0.0
         
     | 
| 219 | 
         
            +
                rir_scp: dump/raw/rirs.scp
         
     | 
| 220 | 
         
            +
                use_lang2vec: true
         
     | 
| 221 | 
         
            +
                lang2vec_type: geo
         
     | 
| 222 | 
         
            +
            loss: aamsoftmax_sc_topk_lang2vec
         
     | 
| 223 | 
         
            +
            loss_conf:
         
     | 
| 224 | 
         
            +
                margin: 0.5
         
     | 
| 225 | 
         
            +
                scale: 30
         
     | 
| 226 | 
         
            +
                K: 3
         
     | 
| 227 | 
         
            +
                mp: 0.06
         
     | 
| 228 | 
         
            +
                k_top: 5
         
     | 
| 229 | 
         
            +
                lang2vec_dim: 299
         
     | 
| 230 | 
         
            +
                lang2vec_type: geo
         
     | 
| 231 | 
         
            +
                lang2vec_weight: 0.2
         
     | 
| 232 | 
         
            +
            required:
         
     | 
| 233 | 
         
            +
            - output_dir
         
     | 
| 234 | 
         
            +
            version: '202506'
         
     | 
| 235 | 
         
            +
            distributed: false
         
     | 
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/accuracy.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/backward_time.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/class_loss.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/clip.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/forward_time.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/geo_loss_all.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/geo_loss_downstream.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/gpu_max_cached_mem_GB.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/grad_norm.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_layer32.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_layer36.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_layer40.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_layer44.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/inter_geo_loss_mean.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/iter_time.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/loss.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/loss_scale.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/optim0_lr0.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/optim_step_time.png
    ADDED
    
    
											 
									 | 
									
								
    	
        exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/images/train_time.png
    ADDED
    
    
											 
									 | 
									
								
    	
        meta.yaml
    ADDED
    
    | 
         @@ -0,0 +1,8 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            espnet: '202506'
         
     | 
| 2 | 
         
            +
            files:
         
     | 
| 3 | 
         
            +
              model_file: exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/17epoch.pth
         
     | 
| 4 | 
         
            +
            python: 3.11.8 | packaged by conda-forge | (main, Feb 16 2024, 20:53:32) [GCC 12.3.0]
         
     | 
| 5 | 
         
            +
            timestamp: 1755582232.857947
         
     | 
| 6 | 
         
            +
            torch: 2.4.0+cu118
         
     | 
| 7 | 
         
            +
            yaml_files:
         
     | 
| 8 | 
         
            +
              train_config: exp_voxlingua107_only/lid_mms_ecapa_upcon_32_44_it0.4_shared_frozen_raw/config.yaml
         
     |