Upload folder using huggingface_hub
Browse files- LICENSE +1 -0
- README.md +1 -0
- amplify_te.py +30 -13
- config.json +2 -2
LICENSE
CHANGED
@@ -5,6 +5,7 @@
|
|
5 |
MIT License
|
6 |
|
7 |
Copyright (c) 2024 chandar-lab
|
|
|
8 |
|
9 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
10 |
of this software and associated documentation files (the "Software"), to deal
|
|
|
5 |
MIT License
|
6 |
|
7 |
Copyright (c) 2024 chandar-lab
|
8 |
+
Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
9 |
|
10 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
11 |
of this software and associated documentation files (the "Software"), to deal
|
README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
---
|
|
|
2 |
license: mit
|
3 |
datasets:
|
4 |
- chandar-lab/UR100P
|
|
|
1 |
---
|
2 |
+
library_name: transformers
|
3 |
license: mit
|
4 |
datasets:
|
5 |
- chandar-lab/UR100P
|
amplify_te.py
CHANGED
@@ -1,6 +1,27 @@
|
|
|
|
1 |
# SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
|
2 |
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
3 |
# SPDX-License-Identifier: MIT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
#
|
5 |
# Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
|
6 |
|
@@ -126,17 +147,15 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
126 |
config.padded_vocab_size,
|
127 |
config.hidden_size,
|
128 |
padding_idx=config.pad_token_id,
|
129 |
-
dtype=config.
|
130 |
)
|
131 |
|
132 |
if config.layer_norm_after_embedding:
|
133 |
self.layer_norm_1 = (
|
134 |
-
transformer_engine.pytorch.RMSNorm(
|
135 |
-
config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
|
136 |
-
)
|
137 |
if config.rms_norm
|
138 |
else transformer_engine.pytorch.LayerNorm(
|
139 |
-
config.hidden_size, config.norm_eps, params_dtype=config.
|
140 |
)
|
141 |
)
|
142 |
|
@@ -148,6 +167,9 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
148 |
intermediate_size = int(2 * config.intermediate_size / 3)
|
149 |
intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
|
150 |
|
|
|
|
|
|
|
151 |
self.transformer_encoder = nn.ModuleList()
|
152 |
for layer_num in range(config.num_hidden_layers):
|
153 |
self.transformer_encoder.append(
|
@@ -173,7 +195,7 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
173 |
window_size=(-1, -1),
|
174 |
rotary_pos_interleaved=True,
|
175 |
seq_length=config.max_length,
|
176 |
-
params_dtype=config.
|
177 |
)
|
178 |
)
|
179 |
|
@@ -191,7 +213,6 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
191 |
output_hidden_states=False,
|
192 |
output_attentions=False,
|
193 |
labels=None,
|
194 |
-
**kwargs,
|
195 |
) -> BaseModelOutput:
|
196 |
"""Forward pass of the AMPLIFY model.
|
197 |
|
@@ -201,7 +222,6 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
|
|
201 |
output_hidden_states (bool): Whether to output the hidden states.
|
202 |
output_attentions (bool): Whether to output the attention weights.
|
203 |
labels (torch.Tensor): The labels.
|
204 |
-
**kwargs: Additional arguments.
|
205 |
|
206 |
Returns:
|
207 |
BaseModelOutput: The output of the model.
|
@@ -256,7 +276,7 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
256 |
config.hidden_size,
|
257 |
config.padded_vocab_size,
|
258 |
config.norm_eps,
|
259 |
-
params_dtype=config.
|
260 |
normalization="RMSNorm" if config.rms_norm else "LayerNorm",
|
261 |
init_method=lambda x: torch.nn.init.uniform_(
|
262 |
x, -self.config.decoder_init_range, self.config.decoder_init_range
|
@@ -265,7 +285,7 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
265 |
|
266 |
else:
|
267 |
self.decoder = transformer_engine.pytorch.Linear(
|
268 |
-
config.hidden_size, config.vocab_size, params_dtype=config.
|
269 |
)
|
270 |
|
271 |
def forward(
|
@@ -275,7 +295,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
275 |
output_hidden_states=False,
|
276 |
output_attentions=False,
|
277 |
labels=None,
|
278 |
-
**kwargs,
|
279 |
) -> MaskedLMOutput:
|
280 |
"""Forward pass of the AMPLIFYForMaskedLM model.
|
281 |
|
@@ -285,7 +304,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
285 |
output_hidden_states (bool): Whether to output the hidden states.
|
286 |
output_attentions (bool): Whether to output the attention weights.
|
287 |
labels (torch.Tensor): The labels.
|
288 |
-
**kwargs: Additional arguments.
|
289 |
|
290 |
Returns:
|
291 |
MaskedLMOutput: The output of the model.
|
@@ -296,7 +314,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
|
|
296 |
output_hidden_states,
|
297 |
output_attentions,
|
298 |
labels,
|
299 |
-
**kwargs,
|
300 |
)
|
301 |
|
302 |
# Classification head with layer norm
|
|
|
1 |
+
# noqa: license-check
|
2 |
# SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
|
3 |
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
4 |
# SPDX-License-Identifier: MIT
|
5 |
+
# Copyright (c) 2024 chandar-lab
|
6 |
+
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
7 |
+
#
|
8 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
9 |
+
# of this software and associated documentation files (the "Software"), to deal
|
10 |
+
# in the Software without restriction, including without limitation the rights
|
11 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12 |
+
# copies of the Software, and to permit persons to whom the Software is
|
13 |
+
# furnished to do so, subject to the following conditions:
|
14 |
+
#
|
15 |
+
# The above copyright notice and this permission notice shall be included in all
|
16 |
+
# copies or substantial portions of the Software.
|
17 |
+
#
|
18 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24 |
+
# SOFTWARE.
|
25 |
#
|
26 |
# Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
|
27 |
|
|
|
147 |
config.padded_vocab_size,
|
148 |
config.hidden_size,
|
149 |
padding_idx=config.pad_token_id,
|
150 |
+
dtype=config.dtype,
|
151 |
)
|
152 |
|
153 |
if config.layer_norm_after_embedding:
|
154 |
self.layer_norm_1 = (
|
155 |
+
transformer_engine.pytorch.RMSNorm(config.hidden_size, config.norm_eps, params_dtype=config.dtype)
|
|
|
|
|
156 |
if config.rms_norm
|
157 |
else transformer_engine.pytorch.LayerNorm(
|
158 |
+
config.hidden_size, config.norm_eps, params_dtype=config.dtype
|
159 |
)
|
160 |
)
|
161 |
|
|
|
167 |
intermediate_size = int(2 * config.intermediate_size / 3)
|
168 |
intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
|
169 |
|
170 |
+
else:
|
171 |
+
intermediate_size = config.intermediate_size
|
172 |
+
|
173 |
self.transformer_encoder = nn.ModuleList()
|
174 |
for layer_num in range(config.num_hidden_layers):
|
175 |
self.transformer_encoder.append(
|
|
|
195 |
window_size=(-1, -1),
|
196 |
rotary_pos_interleaved=True,
|
197 |
seq_length=config.max_length,
|
198 |
+
params_dtype=config.dtype,
|
199 |
)
|
200 |
)
|
201 |
|
|
|
213 |
output_hidden_states=False,
|
214 |
output_attentions=False,
|
215 |
labels=None,
|
|
|
216 |
) -> BaseModelOutput:
|
217 |
"""Forward pass of the AMPLIFY model.
|
218 |
|
|
|
222 |
output_hidden_states (bool): Whether to output the hidden states.
|
223 |
output_attentions (bool): Whether to output the attention weights.
|
224 |
labels (torch.Tensor): The labels.
|
|
|
225 |
|
226 |
Returns:
|
227 |
BaseModelOutput: The output of the model.
|
|
|
276 |
config.hidden_size,
|
277 |
config.padded_vocab_size,
|
278 |
config.norm_eps,
|
279 |
+
params_dtype=config.dtype,
|
280 |
normalization="RMSNorm" if config.rms_norm else "LayerNorm",
|
281 |
init_method=lambda x: torch.nn.init.uniform_(
|
282 |
x, -self.config.decoder_init_range, self.config.decoder_init_range
|
|
|
285 |
|
286 |
else:
|
287 |
self.decoder = transformer_engine.pytorch.Linear(
|
288 |
+
config.hidden_size, config.vocab_size, params_dtype=config.dtype
|
289 |
)
|
290 |
|
291 |
def forward(
|
|
|
295 |
output_hidden_states=False,
|
296 |
output_attentions=False,
|
297 |
labels=None,
|
|
|
298 |
) -> MaskedLMOutput:
|
299 |
"""Forward pass of the AMPLIFYForMaskedLM model.
|
300 |
|
|
|
304 |
output_hidden_states (bool): Whether to output the hidden states.
|
305 |
output_attentions (bool): Whether to output the attention weights.
|
306 |
labels (torch.Tensor): The labels.
|
|
|
307 |
|
308 |
Returns:
|
309 |
MaskedLMOutput: The output of the model.
|
|
|
314 |
output_hidden_states,
|
315 |
output_attentions,
|
316 |
labels,
|
|
|
317 |
)
|
318 |
|
319 |
# Classification head with layer norm
|
config.json
CHANGED
@@ -12,6 +12,7 @@
|
|
12 |
"bos_token_id": 3,
|
13 |
"decoder_init_range": 0.02,
|
14 |
"dropout_prob": 0,
|
|
|
15 |
"embedding_init_range": 0.02,
|
16 |
"eos_token_id": 4,
|
17 |
"ffn_bias": false,
|
@@ -31,8 +32,7 @@
|
|
31 |
"padded_vocab_size": 32,
|
32 |
"pre_activation_layer_norm": true,
|
33 |
"rms_norm": true,
|
34 |
-
"
|
35 |
-
"transformers_version": "4.53.2",
|
36 |
"unk_token_id": 1,
|
37 |
"vocab_path": "conf/tokenizer/amplify_vocab.txt",
|
38 |
"vocab_size": 27
|
|
|
12 |
"bos_token_id": 3,
|
13 |
"decoder_init_range": 0.02,
|
14 |
"dropout_prob": 0,
|
15 |
+
"dtype": "float32",
|
16 |
"embedding_init_range": 0.02,
|
17 |
"eos_token_id": 4,
|
18 |
"ffn_bias": false,
|
|
|
32 |
"padded_vocab_size": 32,
|
33 |
"pre_activation_layer_norm": true,
|
34 |
"rms_norm": true,
|
35 |
+
"transformers_version": "4.56.1",
|
|
|
36 |
"unk_token_id": 1,
|
37 |
"vocab_path": "conf/tokenizer/amplify_vocab.txt",
|
38 |
"vocab_size": 27
|