ancv commited on
Commit
94051fb
·
verified ·
1 Parent(s): 5491367

Delete config.json

Browse files
Files changed (1) hide show
  1. config.json +0 -90
config.json DELETED
@@ -1,90 +0,0 @@
1
- {
2
- "model_type": "spark-tts",
3
- "architectures": [
4
- "SparkTTSModel"
5
- ],
6
- "auto_map": {
7
- "AutoConfig": "configuration_spark_tts.SparkTTSConfig",
8
- "AutoModel": "modeling_spark_tts.SparkTTSModel",
9
- "AutoProcessor": "processing_spark_tts.SparkTTSProcessor"
10
- },
11
- "processor_class": "processing_spark_tts.SparkTTSProcessor",
12
- "custom_pipelines": {
13
- "text-to-speech": {
14
- "impl": "pipeline_spark_tts.SparkTTSPipeline",
15
- "pt": ["AutoModel"]
16
- }
17
- },
18
- "llm_model_name_or_path": "./LLM",
19
- "bicodec_model_name_or_path": "./BiCodec",
20
- "wav2vec2_model_name_or_path": "./wav2vec2-large-xlsr-53",
21
- "sample_rate": 16000,
22
- "highpass_cutoff_freq": 40,
23
- "latent_hop_length": 320,
24
- "ref_segment_duration": 6.0,
25
- "volume_normalize": true,
26
- "bicodec_config": {
27
- "audio_tokenizer": {
28
- "mel_params": {
29
- "sample_rate": 16000,
30
- "n_fft": 1024,
31
- "win_length": 640,
32
- "hop_length": 320,
33
- "mel_fmin": 10,
34
- "mel_fmax": null,
35
- "num_mels": 128
36
- },
37
- "encoder": {
38
- "input_channels": 1024,
39
- "vocos_dim": 384,
40
- "vocos_intermediate_dim": 2048,
41
- "vocos_num_layers": 12,
42
- "out_channels": 1024,
43
- "sample_ratios": [1, 1]
44
- },
45
- "decoder": {
46
- "input_channel": 1024,
47
- "channels": 1536,
48
- "rates": [8, 5, 4, 2],
49
- "kernel_sizes": [16, 11, 8, 4]
50
- },
51
- "quantizer": {
52
- "input_dim": 1024,
53
- "codebook_size": 8192,
54
- "codebook_dim": 8,
55
- "commitment": 0.25,
56
- "codebook_loss_weight": 2.0,
57
- "use_l2_normlize": true,
58
- "threshold_ema_dead_code": 0.2
59
- },
60
- "speaker_encoder": {
61
- "input_dim": 128,
62
- "out_dim": 1024,
63
- "latent_dim": 128,
64
- "token_num": 32,
65
- "fsq_levels": [4, 4, 4, 4, 4, 4],
66
- "fsq_num_quantizers": 1
67
- },
68
- "prenet": {
69
- "input_channels": 1024,
70
- "vocos_dim": 384,
71
- "vocos_intermediate_dim": 2048,
72
- "vocos_num_layers": 12,
73
- "out_channels": 1024,
74
- "condition_dim": 1024,
75
- "sample_ratios": [1, 1],
76
- "use_tanh_at_final": false
77
- },
78
- "postnet": {
79
- "input_channels": 1024,
80
- "vocos_dim": 384,
81
- "vocos_intermediate_dim": 2048,
82
- "vocos_num_layers": 6,
83
- "out_channels": 1024,
84
- "use_tanh_at_final": false
85
- }
86
- }
87
- },
88
- "torch_dtype": "bfloat16",
89
- "transformers_version": "4.43.1"
90
- }