ancv commited on
Commit
5491367
·
verified ·
1 Parent(s): 273b7cf

Delete configuration_spark_tts.py

Browse files
Files changed (1) hide show
  1. configuration_spark_tts.py +0 -87
configuration_spark_tts.py DELETED
@@ -1,87 +0,0 @@
1
- # Copyright (c) 2025 SparkAudio & The HuggingFace Inc. team. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- """ SparkTTS model configuration"""
15
-
16
- from transformers.configuration_utils import PretrainedConfig
17
- from transformers.utils import logging
18
-
19
-
20
- logger = logging.get_logger(__name__)
21
-
22
- class SparkTTSConfig(PretrainedConfig):
23
- """
24
- This is the configuration class to store the configuration of a [`SparkTTSModel`].
25
- It is used to instantiate a SparkTTS model according to the specified arguments, defining the model
26
- architecture and sub-component paths.
27
-
28
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
29
- Read the documentation from [`PretrainedConfig`] for more information.
30
-
31
- Args:
32
- llm_model_name_or_path (`str`, *optional*, defaults to `"./LLM"`):
33
- Path to the pretrained LLM model or model identifier from huggingface.co/models.
34
- bicodec_model_name_or_path (`str`, *optional*, defaults to `"./BiCodec"`):
35
- Path to the pretrained BiCodec model directory.
36
- wav2vec2_model_name_or_path (`str`, *optional*, defaults to `"./wav2vec2-large-xlsr-53"`):
37
- Path to the pretrained Wav2Vec2 model directory.
38
- sample_rate (`int`, *optional*, defaults to 16000):
39
- The sampling rate of the audio files.
40
- highpass_cutoff_freq (`int`, *optional*, defaults to 40):
41
- Highpass filter cutoff frequency for audio processing.
42
- latent_hop_length (`int`, *optional*, defaults to 320):
43
- Hop length used in BiCodec processing.
44
- ref_segment_duration (`float`, *optional*, defaults to 6.0):
45
- Duration (in seconds) of the reference audio clip used for speaker embedding.
46
- volume_normalize (`bool`, *optional*, defaults to `True`):
47
- Whether to normalize the volume of audio inputs.
48
- bicodec_config (`dict`, *optional*):
49
- A dictionary containing the configuration for the BiCodec model components (encoder, decoder, etc.).
50
- This is typically loaded from the `BiCodec/config.yaml` originally.
51
- **kwargs
52
- Additional keyword arguments passed along to [`PretrainedConfig`].
53
- """
54
-
55
- model_type = "spark-tts"
56
- processor_class = "SparkTTSProcessor"
57
- config_files = ["config.json"]
58
- attribute_map = {} # Add mappings if needed for renaming attributes
59
-
60
- def __init__(
61
- self,
62
- llm_model_name_or_path="./LLM",
63
- bicodec_model_name_or_path="./BiCodec",
64
- wav2vec2_model_name_or_path="./wav2vec2-large-xlsr-53",
65
- sample_rate=16000,
66
- highpass_cutoff_freq=40,
67
- latent_hop_length=320,
68
- ref_segment_duration=6.0,
69
- volume_normalize=True,
70
- bicodec_config=None,
71
- **kwargs,
72
- ):
73
- self.llm_model_name_or_path = llm_model_name_or_path
74
- self.bicodec_model_name_or_path = bicodec_model_name_or_path
75
- self.wav2vec2_model_name_or_path = wav2vec2_model_name_or_path
76
- self.sample_rate = sample_rate
77
- self.highpass_cutoff_freq = highpass_cutoff_freq
78
- self.latent_hop_length = latent_hop_length
79
- self.ref_segment_duration = ref_segment_duration
80
- self.volume_normalize = volume_normalize
81
- self.bicodec_config = bicodec_config if bicodec_config is not None else {}
82
-
83
- # REMOVE THIS WARNING - the check in SparkTTSModel is better
84
- # if not self.bicodec_config:
85
- # logger.warning("BiCodec config is empty. BiCodec model might not load correctly.")
86
-
87
- super().__init__(**kwargs)