pstjohn commited on
Commit
98100a7
·
verified ·
1 Parent(s): a9292a7

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. LICENSE +1 -0
  2. README.md +1 -0
  3. amplify_te.py +30 -13
  4. config.json +2 -2
LICENSE CHANGED
@@ -5,6 +5,7 @@
5
  MIT License
6
 
7
  Copyright (c) 2024 chandar-lab
 
8
 
9
  Permission is hereby granted, free of charge, to any person obtaining a copy
10
  of this software and associated documentation files (the "Software"), to deal
 
5
  MIT License
6
 
7
  Copyright (c) 2024 chandar-lab
8
+ Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
9
 
10
  Permission is hereby granted, free of charge, to any person obtaining a copy
11
  of this software and associated documentation files (the "Software"), to deal
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  license: mit
3
  datasets:
4
  - chandar-lab/UR100P
 
1
  ---
2
+ library_name: transformers
3
  license: mit
4
  datasets:
5
  - chandar-lab/UR100P
amplify_te.py CHANGED
@@ -1,6 +1,27 @@
 
1
  # SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
2
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
  # SPDX-License-Identifier: MIT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  #
5
  # Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
6
 
@@ -126,17 +147,15 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
126
  config.padded_vocab_size,
127
  config.hidden_size,
128
  padding_idx=config.pad_token_id,
129
- dtype=config.torch_dtype,
130
  )
131
 
132
  if config.layer_norm_after_embedding:
133
  self.layer_norm_1 = (
134
- transformer_engine.pytorch.RMSNorm(
135
- config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
136
- )
137
  if config.rms_norm
138
  else transformer_engine.pytorch.LayerNorm(
139
- config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
140
  )
141
  )
142
 
@@ -148,6 +167,9 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
148
  intermediate_size = int(2 * config.intermediate_size / 3)
149
  intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
150
 
 
 
 
151
  self.transformer_encoder = nn.ModuleList()
152
  for layer_num in range(config.num_hidden_layers):
153
  self.transformer_encoder.append(
@@ -173,7 +195,7 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
173
  window_size=(-1, -1),
174
  rotary_pos_interleaved=True,
175
  seq_length=config.max_length,
176
- params_dtype=config.torch_dtype,
177
  )
178
  )
179
 
@@ -191,7 +213,6 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
191
  output_hidden_states=False,
192
  output_attentions=False,
193
  labels=None,
194
- **kwargs,
195
  ) -> BaseModelOutput:
196
  """Forward pass of the AMPLIFY model.
197
 
@@ -201,7 +222,6 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
201
  output_hidden_states (bool): Whether to output the hidden states.
202
  output_attentions (bool): Whether to output the attention weights.
203
  labels (torch.Tensor): The labels.
204
- **kwargs: Additional arguments.
205
 
206
  Returns:
207
  BaseModelOutput: The output of the model.
@@ -256,7 +276,7 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
256
  config.hidden_size,
257
  config.padded_vocab_size,
258
  config.norm_eps,
259
- params_dtype=config.torch_dtype,
260
  normalization="RMSNorm" if config.rms_norm else "LayerNorm",
261
  init_method=lambda x: torch.nn.init.uniform_(
262
  x, -self.config.decoder_init_range, self.config.decoder_init_range
@@ -265,7 +285,7 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
265
 
266
  else:
267
  self.decoder = transformer_engine.pytorch.Linear(
268
- config.hidden_size, config.vocab_size, params_dtype=config.torch_dtype
269
  )
270
 
271
  def forward(
@@ -275,7 +295,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
275
  output_hidden_states=False,
276
  output_attentions=False,
277
  labels=None,
278
- **kwargs,
279
  ) -> MaskedLMOutput:
280
  """Forward pass of the AMPLIFYForMaskedLM model.
281
 
@@ -285,7 +304,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
285
  output_hidden_states (bool): Whether to output the hidden states.
286
  output_attentions (bool): Whether to output the attention weights.
287
  labels (torch.Tensor): The labels.
288
- **kwargs: Additional arguments.
289
 
290
  Returns:
291
  MaskedLMOutput: The output of the model.
@@ -296,7 +314,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
296
  output_hidden_states,
297
  output_attentions,
298
  labels,
299
- **kwargs,
300
  )
301
 
302
  # Classification head with layer norm
 
1
+ # noqa: license-check
2
  # SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
3
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
  # SPDX-License-Identifier: MIT
5
+ # Copyright (c) 2024 chandar-lab
6
+ # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7
+ #
8
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ # of this software and associated documentation files (the "Software"), to deal
10
+ # in the Software without restriction, including without limitation the rights
11
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ # copies of the Software, and to permit persons to whom the Software is
13
+ # furnished to do so, subject to the following conditions:
14
+ #
15
+ # The above copyright notice and this permission notice shall be included in all
16
+ # copies or substantial portions of the Software.
17
+ #
18
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ # SOFTWARE.
25
  #
26
  # Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
27
 
 
147
  config.padded_vocab_size,
148
  config.hidden_size,
149
  padding_idx=config.pad_token_id,
150
+ dtype=config.dtype,
151
  )
152
 
153
  if config.layer_norm_after_embedding:
154
  self.layer_norm_1 = (
155
+ transformer_engine.pytorch.RMSNorm(config.hidden_size, config.norm_eps, params_dtype=config.dtype)
 
 
156
  if config.rms_norm
157
  else transformer_engine.pytorch.LayerNorm(
158
+ config.hidden_size, config.norm_eps, params_dtype=config.dtype
159
  )
160
  )
161
 
 
167
  intermediate_size = int(2 * config.intermediate_size / 3)
168
  intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
169
 
170
+ else:
171
+ intermediate_size = config.intermediate_size
172
+
173
  self.transformer_encoder = nn.ModuleList()
174
  for layer_num in range(config.num_hidden_layers):
175
  self.transformer_encoder.append(
 
195
  window_size=(-1, -1),
196
  rotary_pos_interleaved=True,
197
  seq_length=config.max_length,
198
+ params_dtype=config.dtype,
199
  )
200
  )
201
 
 
213
  output_hidden_states=False,
214
  output_attentions=False,
215
  labels=None,
 
216
  ) -> BaseModelOutput:
217
  """Forward pass of the AMPLIFY model.
218
 
 
222
  output_hidden_states (bool): Whether to output the hidden states.
223
  output_attentions (bool): Whether to output the attention weights.
224
  labels (torch.Tensor): The labels.
 
225
 
226
  Returns:
227
  BaseModelOutput: The output of the model.
 
276
  config.hidden_size,
277
  config.padded_vocab_size,
278
  config.norm_eps,
279
+ params_dtype=config.dtype,
280
  normalization="RMSNorm" if config.rms_norm else "LayerNorm",
281
  init_method=lambda x: torch.nn.init.uniform_(
282
  x, -self.config.decoder_init_range, self.config.decoder_init_range
 
285
 
286
  else:
287
  self.decoder = transformer_engine.pytorch.Linear(
288
+ config.hidden_size, config.vocab_size, params_dtype=config.dtype
289
  )
290
 
291
  def forward(
 
295
  output_hidden_states=False,
296
  output_attentions=False,
297
  labels=None,
 
298
  ) -> MaskedLMOutput:
299
  """Forward pass of the AMPLIFYForMaskedLM model.
300
 
 
304
  output_hidden_states (bool): Whether to output the hidden states.
305
  output_attentions (bool): Whether to output the attention weights.
306
  labels (torch.Tensor): The labels.
 
307
 
308
  Returns:
309
  MaskedLMOutput: The output of the model.
 
314
  output_hidden_states,
315
  output_attentions,
316
  labels,
 
317
  )
318
 
319
  # Classification head with layer norm
config.json CHANGED
@@ -12,6 +12,7 @@
12
  "bos_token_id": 3,
13
  "decoder_init_range": 0.02,
14
  "dropout_prob": 0,
 
15
  "embedding_init_range": 0.02,
16
  "eos_token_id": 4,
17
  "ffn_bias": false,
@@ -31,8 +32,7 @@
31
  "padded_vocab_size": 32,
32
  "pre_activation_layer_norm": true,
33
  "rms_norm": true,
34
- "torch_dtype": "float32",
35
- "transformers_version": "4.53.2",
36
  "unk_token_id": 1,
37
  "vocab_path": "conf/tokenizer/amplify_vocab.txt",
38
  "vocab_size": 27
 
12
  "bos_token_id": 3,
13
  "decoder_init_range": 0.02,
14
  "dropout_prob": 0,
15
+ "dtype": "float32",
16
  "embedding_init_range": 0.02,
17
  "eos_token_id": 4,
18
  "ffn_bias": false,
 
32
  "padded_vocab_size": 32,
33
  "pre_activation_layer_norm": true,
34
  "rms_norm": true,
35
+ "transformers_version": "4.56.1",
 
36
  "unk_token_id": 1,
37
  "vocab_path": "conf/tokenizer/amplify_vocab.txt",
38
  "vocab_size": 27