piotrzelasko commited on
Commit
eb793b7
·
verified ·
1 Parent(s): aac6ac8

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ results_hf/open-asr-leaderboarddatasets-test-only-spgispeech-test.jsonl filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,11 @@
1
- ---
2
- license: cc-by-4.0
3
- ---
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: NeMo
3
+ tags:
4
+ - model_hub_mixin
5
+ - pytorch_model_hub_mixin
6
+ ---
7
+
8
+ This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
9
+ - Code: https://github.com/NVIDIA/NeMo
10
+ - Paper: [More Information Needed]
11
+ - Docs: https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit
config.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_locator_tag": "<|audioplaceholder|>",
3
+ "freeze_params": [
4
+ "^llm\\..+$",
5
+ "^embed_tokens\\..+$"
6
+ ],
7
+ "lora": {
8
+ "lora_alpha": 256,
9
+ "lora_dropout": 0.01,
10
+ "r": 128,
11
+ "target_modules": [
12
+ "q_proj",
13
+ "v_proj"
14
+ ],
15
+ "task_type": "CAUSAL_LM"
16
+ },
17
+ "lr_scheduler": {
18
+ "_target_": "nemo.core.optim.lr_scheduler.CosineAnnealing",
19
+ "max_steps": 100000,
20
+ "min_lr": 1e-06,
21
+ "warmup_steps": 1000
22
+ },
23
+ "optimizer": {
24
+ "_target_": "torch.optim.AdamW",
25
+ "betas": [
26
+ 0.9,
27
+ 0.98
28
+ ],
29
+ "foreach": true,
30
+ "lr": 0.0005,
31
+ "weight_decay": 0.001
32
+ },
33
+ "perception": {
34
+ "encoder": {
35
+ "_target_": "nemo.collections.asr.modules.ConformerEncoder",
36
+ "att_context_size": [
37
+ -1,
38
+ -1
39
+ ],
40
+ "causal_downsampling": false,
41
+ "conv_context_size": null,
42
+ "conv_kernel_size": 9,
43
+ "conv_norm_type": "batch_norm",
44
+ "d_model": 1024,
45
+ "dropout": 0.1,
46
+ "dropout_att": 0.1,
47
+ "dropout_emb": 0.0,
48
+ "dropout_pre_encoder": 0.1,
49
+ "feat_in": 128,
50
+ "feat_out": -1,
51
+ "ff_expansion_factor": 4,
52
+ "n_heads": 8,
53
+ "n_layers": 32,
54
+ "pos_emb_max_len": 5000,
55
+ "reduction": null,
56
+ "reduction_factor": 1,
57
+ "reduction_position": null,
58
+ "self_attention_model": "rel_pos",
59
+ "subsampling": "dw_striding",
60
+ "subsampling_conv_channels": 256,
61
+ "subsampling_factor": 8,
62
+ "untie_biases": true,
63
+ "xscaling": false
64
+ },
65
+ "modality_adapter": {
66
+ "_target_": "nemo.collections.speechlm2.modules.perception.IdentityConnector",
67
+ "d_model": 1024
68
+ },
69
+ "output_dim": 2048,
70
+ "preprocessor": {
71
+ "_target_": "nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor",
72
+ "dither": 1e-05,
73
+ "features": 128,
74
+ "frame_splicing": 1,
75
+ "log": true,
76
+ "n_fft": 512,
77
+ "normalize": "per_feature",
78
+ "pad_to": 0,
79
+ "pad_value": 0.0,
80
+ "sample_rate": 16000,
81
+ "window": "hann",
82
+ "window_size": 0.025,
83
+ "window_stride": 0.01
84
+ },
85
+ "target": "nemo.collections.speechlm2.modules.perception.AudioPerceptionModule"
86
+ },
87
+ "pretrained_asr": "nvidia/canary-1b-flash",
88
+ "pretrained_llm": "Qwen/Qwen3-1.7B",
89
+ "pretrained_weights": true,
90
+ "prevent_freeze_params": [
91
+ "^.+\\.lora_.+$"
92
+ ],
93
+ "prompt_format": "qwen"
94
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4e6807ae3b8cf871db02fd7e8ddd624747e2d5c5c1b4a7a183338d73eb0a199
3
+ size 10238014368
results_hf/open-asr-leaderboarddatasets-test-only-ami-test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_hf/open-asr-leaderboarddatasets-test-only-earnings22-test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_hf/open-asr-leaderboarddatasets-test-only-gigaspeech-test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_hf/open-asr-leaderboarddatasets-test-only-librispeech-test.clean.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_hf/open-asr-leaderboarddatasets-test-only-librispeech-test.other.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_hf/open-asr-leaderboarddatasets-test-only-spgispeech-test.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf2bb7864354c7fb647c61585f403cb349b5bc071cee943b1b8b13ab16f39ab8
3
+ size 13620064
results_hf/open-asr-leaderboarddatasets-test-only-tedlium-test.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
results_hf/open-asr-leaderboarddatasets-test-only-voxpopuli-test.jsonl ADDED
The diff for this file is too large to render. See raw diff