Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.gitattributes +1 -0
README.md +11 -3
config.json +94 -0
model.safetensors +3 -0
results_hf/open-asr-leaderboarddatasets-test-only-ami-test.jsonl +0 -0
results_hf/open-asr-leaderboarddatasets-test-only-earnings22-test.jsonl +0 -0
results_hf/open-asr-leaderboarddatasets-test-only-gigaspeech-test.jsonl +0 -0
results_hf/open-asr-leaderboarddatasets-test-only-librispeech-test.clean.jsonl +0 -0
results_hf/open-asr-leaderboarddatasets-test-only-librispeech-test.other.jsonl +0 -0
results_hf/open-asr-leaderboarddatasets-test-only-spgispeech-test.jsonl +3 -0
results_hf/open-asr-leaderboarddatasets-test-only-tedlium-test.jsonl +0 -0
results_hf/open-asr-leaderboarddatasets-test-only-voxpopuli-test.jsonl +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+results_hf/open-asr-leaderboarddatasets-test-only-spgispeech-test.jsonl filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,11 @@
----
-license: cc-by-4.0
----

+---
+library_name: NeMo
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Code: https://github.com/NVIDIA/NeMo
+- Paper: [More Information Needed]
+- Docs: https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit

config.json ADDED Viewed

	@@ -0,0 +1,94 @@

+{
+  "audio_locator_tag": "<|audioplaceholder|>",
+  "freeze_params": [
+    "^llm\\..+$",
+    "^embed_tokens\\..+$"
+  ],
+  "lora": {
+    "lora_alpha": 256,
+    "lora_dropout": 0.01,
+    "r": 128,
+    "target_modules": [
+      "q_proj",
+      "v_proj"
+    ],
+    "task_type": "CAUSAL_LM"
+  },
+  "lr_scheduler": {
+    "_target_": "nemo.core.optim.lr_scheduler.CosineAnnealing",
+    "max_steps": 100000,
+    "min_lr": 1e-06,
+    "warmup_steps": 1000
+  },
+  "optimizer": {
+    "_target_": "torch.optim.AdamW",
+    "betas": [
+      0.9,
+      0.98
+    ],
+    "foreach": true,
+    "lr": 0.0005,
+    "weight_decay": 0.001
+  },
+  "perception": {
+    "encoder": {
+      "_target_": "nemo.collections.asr.modules.ConformerEncoder",
+      "att_context_size": [
+        -1,
+        -1
+      ],
+      "causal_downsampling": false,
+      "conv_context_size": null,
+      "conv_kernel_size": 9,
+      "conv_norm_type": "batch_norm",
+      "d_model": 1024,
+      "dropout": 0.1,
+      "dropout_att": 0.1,
+      "dropout_emb": 0.0,
+      "dropout_pre_encoder": 0.1,
+      "feat_in": 128,
+      "feat_out": -1,
+      "ff_expansion_factor": 4,
+      "n_heads": 8,
+      "n_layers": 32,
+      "pos_emb_max_len": 5000,
+      "reduction": null,
+      "reduction_factor": 1,
+      "reduction_position": null,
+      "self_attention_model": "rel_pos",
+      "subsampling": "dw_striding",
+      "subsampling_conv_channels": 256,
+      "subsampling_factor": 8,
+      "untie_biases": true,
+      "xscaling": false
+    },
+    "modality_adapter": {
+      "_target_": "nemo.collections.speechlm2.modules.perception.IdentityConnector",
+      "d_model": 1024
+    },
+    "output_dim": 2048,
+    "preprocessor": {
+      "_target_": "nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor",
+      "dither": 1e-05,
+      "features": 128,
+      "frame_splicing": 1,
+      "log": true,
+      "n_fft": 512,
+      "normalize": "per_feature",
+      "pad_to": 0,
+      "pad_value": 0.0,
+      "sample_rate": 16000,
+      "window": "hann",
+      "window_size": 0.025,
+      "window_stride": 0.01
+    },
+    "target": "nemo.collections.speechlm2.modules.perception.AudioPerceptionModule"
+  },
+  "pretrained_asr": "nvidia/canary-1b-flash",
+  "pretrained_llm": "Qwen/Qwen3-1.7B",
+  "pretrained_weights": true,
+  "prevent_freeze_params": [
+    "^.+\\.lora_.+$"
+  ],
+  "prompt_format": "qwen"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4e6807ae3b8cf871db02fd7e8ddd624747e2d5c5c1b4a7a183338d73eb0a199
+size 10238014368

results_hf/open-asr-leaderboarddatasets-test-only-ami-test.jsonl ADDED Viewed