eyad-silx commited on
Commit
a44b70f
·
verified ·
1 Parent(s): 9d2f2d0

Update configuration_quasrav4.py

Browse files
Files changed (1) hide show
  1. configuration_quasrav4.py +1 -9
configuration_quasrav4.py CHANGED
@@ -8,7 +8,6 @@ class InfinityFormerConfig(PretrainedConfig):
8
  model_type = "infinity_former"
9
 
10
  def __init__(self, **kwargs):
11
- # Pop custom arguments from kwargs, using defaults from your config.json
12
  self.vocab_size = kwargs.pop("vocab_size", 151669)
13
  self.hidden_size = kwargs.pop("hidden_size", 768)
14
  self.num_hidden_layers = kwargs.pop("num_hidden_layers", 54)
@@ -16,7 +15,7 @@ class InfinityFormerConfig(PretrainedConfig):
16
  self.intermediate_size = kwargs.pop("intermediate_size", 3072)
17
  self.hidden_dropout_prob = kwargs.pop("hidden_dropout_prob", 0.1)
18
  self.attention_probs_dropout_prob = kwargs.pop("attention_probs_dropout_prob", 0.1)
19
- self.max_position_embeddings = kwargs.pop("max_position_embeddings", 812)
20
  self.initializer_range = kwargs.pop("initializer_range", 0.02)
21
  self.layer_norm_eps = kwargs.pop("layer_norm_eps", 1e-5)
22
  self.use_rotary_embeddings = kwargs.pop("use_rotary_embeddings", True)
@@ -32,17 +31,10 @@ class InfinityFormerConfig(PretrainedConfig):
32
  self.use_memory_attention = kwargs.pop("use_memory_attention", False)
33
  self.use_gradient_checkpointing = kwargs.pop("use_gradient_checkpointing", False)
34
 
35
- # The `use_return_dict` is a read-only property that depends on `return_dict`.
36
- # We must pop it from kwargs before calling super().__init__ to avoid an error.
37
  use_return_dict = kwargs.pop("use_return_dict", True)
38
-
39
- # Pass the rest of the arguments to the parent class.
40
  super().__init__(**kwargs)
41
-
42
- # Now, set the underlying attribute that the `use_return_dict` property uses.
43
  self.return_dict = use_return_dict
44
 
45
- # Validation logic
46
  if self.hidden_size % self.num_attention_heads != 0:
47
  raise ValueError(
48
  f"`hidden_size` ({self.hidden_size}) must be a multiple of `num_attention_heads` "
 
8
  model_type = "infinity_former"
9
 
10
  def __init__(self, **kwargs):
 
11
  self.vocab_size = kwargs.pop("vocab_size", 151669)
12
  self.hidden_size = kwargs.pop("hidden_size", 768)
13
  self.num_hidden_layers = kwargs.pop("num_hidden_layers", 54)
 
15
  self.intermediate_size = kwargs.pop("intermediate_size", 3072)
16
  self.hidden_dropout_prob = kwargs.pop("hidden_dropout_prob", 0.1)
17
  self.attention_probs_dropout_prob = kwargs.pop("attention_probs_dropout_prob", 0.1)
18
+ self.max_position_embeddings = kwargs.pop("max_position_embeddings", 8192)
19
  self.initializer_range = kwargs.pop("initializer_range", 0.02)
20
  self.layer_norm_eps = kwargs.pop("layer_norm_eps", 1e-5)
21
  self.use_rotary_embeddings = kwargs.pop("use_rotary_embeddings", True)
 
31
  self.use_memory_attention = kwargs.pop("use_memory_attention", False)
32
  self.use_gradient_checkpointing = kwargs.pop("use_gradient_checkpointing", False)
33
 
 
 
34
  use_return_dict = kwargs.pop("use_return_dict", True)
 
 
35
  super().__init__(**kwargs)
 
 
36
  self.return_dict = use_return_dict
37
 
 
38
  if self.hidden_size % self.num_attention_heads != 0:
39
  raise ValueError(
40
  f"`hidden_size` ({self.hidden_size}) must be a multiple of `num_attention_heads` "