Update configuration_helpingai.py
Browse files- configuration_helpingai.py +4 -62
configuration_helpingai.py
CHANGED
@@ -1,61 +1,3 @@
|
|
1 |
-
from transformers.configuration_utils import PretrainedConfig
|
2 |
-
|
3 |
-
|
4 |
-
class HelpingAIConfig(PretrainedConfig):
|
5 |
-
model_type = "helpingai"
|
6 |
-
|
7 |
-
def __init__(
|
8 |
-
self,
|
9 |
-
vocab_size=50257,
|
10 |
-
hidden_size=768,
|
11 |
-
num_hidden_layers=12,
|
12 |
-
num_attention_heads=12,
|
13 |
-
intermediate_size=3072,
|
14 |
-
max_position_embeddings=2048,
|
15 |
-
layer_norm_epsilon=1e-5,
|
16 |
-
hidden_act="gelu",
|
17 |
-
dropout=0.0,
|
18 |
-
attention_dropout=0.0,
|
19 |
-
tie_word_embeddings=True,
|
20 |
-
# Structured output head
|
21 |
-
use_structured_output=True,
|
22 |
-
structured_output_vocab_size=2,
|
23 |
-
# Speech head
|
24 |
-
use_speech_output=False,
|
25 |
-
speech_num_mels=80,
|
26 |
-
speech_head_hidden_dim=1024,
|
27 |
-
speech_upsample_factor=1,
|
28 |
-
speech_loss_type="l1",
|
29 |
-
# Misc
|
30 |
-
initializer_range=0.02,
|
31 |
-
**kwargs,
|
32 |
-
):
|
33 |
-
super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|
34 |
-
self.vocab_size = vocab_size
|
35 |
-
self.hidden_size = hidden_size
|
36 |
-
self.num_hidden_layers = num_hidden_layers
|
37 |
-
self.num_attention_heads = num_attention_heads
|
38 |
-
self.intermediate_size = intermediate_size
|
39 |
-
self.max_position_embeddings = max_position_embeddings
|
40 |
-
self.layer_norm_epsilon = layer_norm_epsilon
|
41 |
-
self.hidden_act = hidden_act
|
42 |
-
self.dropout = dropout
|
43 |
-
self.attention_dropout = attention_dropout
|
44 |
-
self.initializer_range = initializer_range
|
45 |
-
|
46 |
-
# Structured
|
47 |
-
self.use_structured_output = use_structured_output
|
48 |
-
self.structured_output_vocab_size = structured_output_vocab_size
|
49 |
-
|
50 |
-
# Speech
|
51 |
-
self.use_speech_output = use_speech_output
|
52 |
-
self.speech_num_mels = speech_num_mels
|
53 |
-
self.speech_head_hidden_dim = speech_head_hidden_dim
|
54 |
-
self.speech_upsample_factor = speech_upsample_factor
|
55 |
-
self.speech_loss_type = speech_loss_type
|
56 |
-
|
57 |
-
"""HelpingAI model configuration"""
|
58 |
-
|
59 |
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
|
60 |
from transformers.modeling_rope_utils import rope_config_validation
|
61 |
from transformers.utils import logging
|
@@ -264,10 +206,10 @@ class HelpingAIConfig(PretrainedConfig):
|
|
264 |
structured_output_vocab_size=100,
|
265 |
empathy_scaling_factor=1.2,
|
266 |
reasoning_temperature=0.8,
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
# Speech output head options
|
272 |
use_speech_output=False,
|
273 |
speech_num_mels=80,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
|
2 |
from transformers.modeling_rope_utils import rope_config_validation
|
3 |
from transformers.utils import logging
|
|
|
206 |
structured_output_vocab_size=100,
|
207 |
empathy_scaling_factor=1.2,
|
208 |
reasoning_temperature=0.8,
|
209 |
+
# Structured head architecture (new)
|
210 |
+
structured_head_type: str = "linear", # one of: linear, mlp_v1
|
211 |
+
structured_head_hidden_dim: int | None = None,
|
212 |
+
structured_head_activation: str = "gelu", # gelu or relu
|
213 |
# Speech output head options
|
214 |
use_speech_output=False,
|
215 |
speech_num_mels=80,
|