RuntimeRacer
commited on
Commit
·
a4e7e04
1
Parent(s):
ca75cff
initial weights commit
Browse files- encoder/config.json +23 -0
- encoder/encoder.pt +3 -0
- synthesizer/config.json +37 -0
- synthesizer/synthesizer.pt +3 -0
- vocoder/config.json +28 -0
- vocoder/vocoder.pt +3 -0
encoder/config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_version_": "v1",
|
3 |
+
"data": {
|
4 |
+
"sampling_rate": 16000,
|
5 |
+
"filter_length": 1024,
|
6 |
+
"mel_window_step": 10,
|
7 |
+
"mel_window_length": 25,
|
8 |
+
"mel_n_channels": 40,
|
9 |
+
"normalize": true,
|
10 |
+
"trim_silence": true,
|
11 |
+
"rescale": true,
|
12 |
+
"rescaling_max": 0.9,
|
13 |
+
"vad_window_length": 30,
|
14 |
+
"vad_moving_average_width": 8,
|
15 |
+
"vad_max_silence_length": 6
|
16 |
+
},
|
17 |
+
"model": {
|
18 |
+
"mel_n_channels": 40,
|
19 |
+
"model_hidden_size": 768,
|
20 |
+
"model_num_layers": 3,
|
21 |
+
"model_embedding_size": 768
|
22 |
+
}
|
23 |
+
}
|
encoder/encoder.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f79e24303ada15f4859d9661ba8cc96f37e9bff27ce8bc45281590634bf41ed0
|
3 |
+
size 150352630
|
synthesizer/config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_version_": "v1",
|
3 |
+
"data": {
|
4 |
+
"sampling_rate": 16000,
|
5 |
+
"filter_length": 1024,
|
6 |
+
"hop_length": 256,
|
7 |
+
"win_length": 1024
|
8 |
+
},
|
9 |
+
"model": {
|
10 |
+
"embed_dims": 256,
|
11 |
+
"series_embed_dims": 64,
|
12 |
+
"num_chars": 66,
|
13 |
+
"durpred_conv_dims": 256,
|
14 |
+
"durpred_rnn_dims": 64,
|
15 |
+
"durpred_dropout": 0.5,
|
16 |
+
"pitch_conv_dims": 256,
|
17 |
+
"pitch_rnn_dims": 128,
|
18 |
+
"pitch_dropout": 0.5,
|
19 |
+
"pitch_strength": 1.0,
|
20 |
+
"energy_conv_dims": 256,
|
21 |
+
"energy_rnn_dims": 64,
|
22 |
+
"energy_dropout": 0.5,
|
23 |
+
"energy_strength": 1.0,
|
24 |
+
"rnn_dims": 512,
|
25 |
+
"prenet_dims": 256,
|
26 |
+
"prenet_k": 16,
|
27 |
+
"postnet_num_highways": 4,
|
28 |
+
"prenet_dropout": 0.5,
|
29 |
+
"postnet_dims": 256,
|
30 |
+
"postnet_k": 8,
|
31 |
+
"prenet_num_highways": 4,
|
32 |
+
"postnet_dropout": 0.5,
|
33 |
+
"n_mels": 80,
|
34 |
+
"speaker_embed_dims": 768,
|
35 |
+
"padding_value": -11.5129
|
36 |
+
}
|
37 |
+
}
|
synthesizer/synthesizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbee17973561ddeaed1def00ee647b56c40939e2673f27792087476fd332d3ff
|
3 |
+
size 367179977
|
vocoder/config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_version_": "v1",
|
3 |
+
"data": {
|
4 |
+
"sampling_rate": 16000,
|
5 |
+
"filter_length": 1024,
|
6 |
+
"hop_length": 256,
|
7 |
+
"win_length": 1024
|
8 |
+
},
|
9 |
+
"model": {
|
10 |
+
"in_channels": 80,
|
11 |
+
"out_channels": 4,
|
12 |
+
"kernel_size": 7,
|
13 |
+
"channels": 384,
|
14 |
+
"bias": true,
|
15 |
+
"upsample_scales": [5, 5, 2],
|
16 |
+
"stack_kernel_size": 3,
|
17 |
+
"stacks": 4,
|
18 |
+
"nonlinear_activation": "LeakyReLU",
|
19 |
+
"nonlinear_activation_params": {
|
20 |
+
"negative_slope": 0.2
|
21 |
+
},
|
22 |
+
"pad": "ReflectionPad1d",
|
23 |
+
"pad_params": {},
|
24 |
+
"use_final_nonlinear_activation": true,
|
25 |
+
"use_weight_norm": true,
|
26 |
+
"use_causal_conv": false
|
27 |
+
}
|
28 |
+
}
|
vocoder/vocoder.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8fc3c558778d4e509200a2fa29f2e0a67314ca6253a7c9833b79502fbc49ad6
|
3 |
+
size 104043001
|