badaoui HF Staff commited on
Commit
fe41f38
·
verified ·
1 Parent(s): ab398ca

Add tiny random SeamlessM4T configuration for testing

Browse files
Files changed (3) hide show
  1. README.md +13 -0
  2. config.json +114 -0
  3. pytorch_model.bin +3 -0
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - testing
5
+ - tiny-model
6
+ - seamless-m4t
7
+ - config-only
8
+ ---
9
+
10
+ # Tiny Random SeamlessM4T Model Configuration
11
+
12
+ This is a tiny configuration for facebook/hf-seamless-m4t-medium with reduced dimensions, designed for testing purposes.
13
+
config.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "relu",
4
+ "adaptor_dropout": 0.1,
5
+ "adaptor_kernel_size": 8,
6
+ "adaptor_stride": 8,
7
+ "add_adapter": true,
8
+ "architectures": [
9
+ "SeamlessM4TModel"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 2,
13
+ "conv_depthwise_kernel_size": 31,
14
+ "decoder_attention_heads": 2,
15
+ "decoder_ffn_dim": 128,
16
+ "decoder_layerdrop": 0.05,
17
+ "decoder_layers": 2,
18
+ "decoder_start_token_id": 3,
19
+ "dropout": 0.1,
20
+ "encoder_attention_heads": 2,
21
+ "encoder_ffn_dim": 128,
22
+ "encoder_layerdrop": 0.05,
23
+ "encoder_layers": 2,
24
+ "eos_token_id": 3,
25
+ "feature_projection_input_dim": 160,
26
+ "hidden_size": 64,
27
+ "initializer_range": 0.02,
28
+ "is_encoder_decoder": true,
29
+ "lang_embed_dim": 256,
30
+ "layer_norm_eps": 1e-05,
31
+ "leaky_relu_slope": 0.1,
32
+ "max_new_tokens": 256,
33
+ "max_position_embeddings": 512,
34
+ "max_source_positions": 4096,
35
+ "model_type": "seamless_m4t",
36
+ "num_adapter_layers": 1,
37
+ "num_attention_heads": 16,
38
+ "num_conv_pos_embedding_groups": 16,
39
+ "num_conv_pos_embeddings": 128,
40
+ "num_hidden_layers": 12,
41
+ "pad_token_id": 0,
42
+ "position_embeddings_type": "relative",
43
+ "resblock_dilation_sizes": [
44
+ [
45
+ 1,
46
+ 3,
47
+ 5
48
+ ],
49
+ [
50
+ 1,
51
+ 3,
52
+ 5
53
+ ],
54
+ [
55
+ 1,
56
+ 3,
57
+ 5
58
+ ]
59
+ ],
60
+ "resblock_kernel_sizes": [
61
+ 3,
62
+ 7,
63
+ 11
64
+ ],
65
+ "rotary_embedding_base": 10000,
66
+ "sampling_rate": 16000,
67
+ "scale_embedding": true,
68
+ "speech_encoder_attention_heads": 16,
69
+ "speech_encoder_dropout": 0.0,
70
+ "speech_encoder_hidden_act": "swish",
71
+ "speech_encoder_intermediate_size": 4096,
72
+ "speech_encoder_layerdrop": 0.1,
73
+ "speech_encoder_layers": 12,
74
+ "spkr_embed_dim": 256,
75
+ "t2u_bos_token_id": 0,
76
+ "t2u_decoder_attention_heads": 16,
77
+ "t2u_decoder_ffn_dim": 8192,
78
+ "t2u_decoder_layers": 4,
79
+ "t2u_decoder_start_token_id": 2,
80
+ "t2u_encoder_attention_heads": 16,
81
+ "t2u_encoder_ffn_dim": 8192,
82
+ "t2u_encoder_layers": 4,
83
+ "t2u_eos_token_id": 2,
84
+ "t2u_max_new_tokens": 1024,
85
+ "t2u_max_position_embeddings": 2048,
86
+ "t2u_pad_token_id": 1,
87
+ "t2u_vocab_size": 10082,
88
+ "torch_dtype": "float32",
89
+ "transformers_version": "4.53.1",
90
+ "unit_embed_dim": 1280,
91
+ "unit_hifi_gan_vocab_size": 10000,
92
+ "upsample_initial_channel": 512,
93
+ "upsample_kernel_sizes": [
94
+ 11,
95
+ 8,
96
+ 8,
97
+ 4,
98
+ 4
99
+ ],
100
+ "upsample_rates": [
101
+ 5,
102
+ 4,
103
+ 4,
104
+ 2,
105
+ 2
106
+ ],
107
+ "use_cache": true,
108
+ "var_pred_dropout": 0.5,
109
+ "variance_predictor_kernel_size": 3,
110
+ "vocab_size": 256206,
111
+ "vocoder_num_langs": 36,
112
+ "vocoder_num_spkrs": 200,
113
+ "vocoder_offset": 4
114
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5217dbac2088bb5bd242dc3926de20473be6f857826523b4a3fb90dfa8a26e34
3
+ size 1619