Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

LICENSE +1 -0
README.md +1 -0
amplify_te.py +30 -13
config.json +2 -2

LICENSE CHANGED Viewed

@@ -5,6 +5,7 @@
 MIT License
 Copyright (c) 2024 chandar-lab
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

 MIT License
 Copyright (c) 2024 chandar-lab
+Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

README.md CHANGED Viewed

@@ -1,4 +1,5 @@
 ---
 license: mit
 datasets:
   - chandar-lab/UR100P

 ---
+library_name: transformers
 license: mit
 datasets:
   - chandar-lab/UR100P

amplify_te.py CHANGED Viewed

@@ -1,6 +1,27 @@
 # SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: MIT
 #
 # Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
@@ -126,17 +147,15 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
             config.padded_vocab_size,
             config.hidden_size,
             padding_idx=config.pad_token_id,
-            dtype=config.torch_dtype,
         )
         if config.layer_norm_after_embedding:
             self.layer_norm_1 = (
-                transformer_engine.pytorch.RMSNorm(
-                    config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
-                )
                 if config.rms_norm
                 else transformer_engine.pytorch.LayerNorm(
-                    config.hidden_size, config.norm_eps, params_dtype=config.torch_dtype
                 )
             )
@@ -148,6 +167,9 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
             intermediate_size = int(2 * config.intermediate_size / 3)
             intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
         self.transformer_encoder = nn.ModuleList()
         for layer_num in range(config.num_hidden_layers):
             self.transformer_encoder.append(
@@ -173,7 +195,7 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
                     window_size=(-1, -1),
                     rotary_pos_interleaved=True,
                     seq_length=config.max_length,
-                    params_dtype=config.torch_dtype,
                 )
             )
@@ -191,7 +213,6 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
         output_hidden_states=False,
         output_attentions=False,
         labels=None,
-        **kwargs,
     ) -> BaseModelOutput:
         """Forward pass of the AMPLIFY model.
@@ -201,7 +222,6 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
             output_hidden_states (bool): Whether to output the hidden states.
             output_attentions (bool): Whether to output the attention weights.
             labels (torch.Tensor): The labels.
-            **kwargs: Additional arguments.
         Returns:
             BaseModelOutput: The output of the model.
@@ -256,7 +276,7 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
                 config.hidden_size,
                 config.padded_vocab_size,
                 config.norm_eps,
-                params_dtype=config.torch_dtype,
                 normalization="RMSNorm" if config.rms_norm else "LayerNorm",
                 init_method=lambda x: torch.nn.init.uniform_(
                     x, -self.config.decoder_init_range, self.config.decoder_init_range
@@ -265,7 +285,7 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
         else:
             self.decoder = transformer_engine.pytorch.Linear(
-                config.hidden_size, config.vocab_size, params_dtype=config.torch_dtype
             )
     def forward(
@@ -275,7 +295,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
         output_hidden_states=False,
         output_attentions=False,
         labels=None,
-        **kwargs,
     ) -> MaskedLMOutput:
         """Forward pass of the AMPLIFYForMaskedLM model.
@@ -285,7 +304,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
             output_hidden_states (bool): Whether to output the hidden states.
             output_attentions (bool): Whether to output the attention weights.
             labels (torch.Tensor): The labels.
-            **kwargs: Additional arguments.
         Returns:
             MaskedLMOutput: The output of the model.
@@ -296,7 +314,6 @@ class AMPLIFYForMaskedLM(AMPLIFYPreTrainedModel):
             output_hidden_states,
             output_attentions,
             labels,
-            **kwargs,
         )
         # Classification head with layer norm

+# noqa: license-check
 # SPDX-FileCopyrightText: Copyright (c) 2024 chandar-lab
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: MIT
+# Copyright (c) 2024 chandar-lab
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
 #
 # Adapted from https://huggingface.co/chandar-lab/AMPLIFY_120M/blob/main/amplify.py
             config.padded_vocab_size,
             config.hidden_size,
             padding_idx=config.pad_token_id,
+            dtype=config.dtype,
         )
         if config.layer_norm_after_embedding:
             self.layer_norm_1 = (
+                transformer_engine.pytorch.RMSNorm(config.hidden_size, config.norm_eps, params_dtype=config.dtype)
                 if config.rms_norm
                 else transformer_engine.pytorch.LayerNorm(
+                    config.hidden_size, config.norm_eps, params_dtype=config.dtype
                 )
             )
             intermediate_size = int(2 * config.intermediate_size / 3)
             intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1) // multiple_of)
+        else:
+            intermediate_size = config.intermediate_size
         self.transformer_encoder = nn.ModuleList()
         for layer_num in range(config.num_hidden_layers):
             self.transformer_encoder.append(
                     window_size=(-1, -1),
                     rotary_pos_interleaved=True,
                     seq_length=config.max_length,
+                    params_dtype=config.dtype,
                 )
             )
         output_hidden_states=False,
         output_attentions=False,
         labels=None,
     ) -> BaseModelOutput:
         """Forward pass of the AMPLIFY model.
             output_hidden_states (bool): Whether to output the hidden states.
             output_attentions (bool): Whether to output the attention weights.
             labels (torch.Tensor): The labels.
         Returns:
             BaseModelOutput: The output of the model.
                 config.hidden_size,
                 config.padded_vocab_size,
                 config.norm_eps,
+                params_dtype=config.dtype,
                 normalization="RMSNorm" if config.rms_norm else "LayerNorm",
                 init_method=lambda x: torch.nn.init.uniform_(
                     x, -self.config.decoder_init_range, self.config.decoder_init_range
         else:
             self.decoder = transformer_engine.pytorch.Linear(
+                config.hidden_size, config.vocab_size, params_dtype=config.dtype
             )
     def forward(
         output_hidden_states=False,
         output_attentions=False,
         labels=None,
     ) -> MaskedLMOutput:
         """Forward pass of the AMPLIFYForMaskedLM model.
             output_hidden_states (bool): Whether to output the hidden states.
             output_attentions (bool): Whether to output the attention weights.
             labels (torch.Tensor): The labels.
         Returns:
             MaskedLMOutput: The output of the model.
             output_hidden_states,
             output_attentions,
             labels,
         )
         # Classification head with layer norm

config.json CHANGED Viewed

@@ -12,6 +12,7 @@
   "bos_token_id": 3,
   "decoder_init_range": 0.02,
   "dropout_prob": 0,
   "embedding_init_range": 0.02,
   "eos_token_id": 4,
   "ffn_bias": false,
@@ -31,8 +32,7 @@
   "padded_vocab_size": 32,
   "pre_activation_layer_norm": true,
   "rms_norm": true,
-  "torch_dtype": "float32",
-  "transformers_version": "4.53.2",
   "unk_token_id": 1,
   "vocab_path": "conf/tokenizer/amplify_vocab.txt",
   "vocab_size": 27

   "bos_token_id": 3,
   "decoder_init_range": 0.02,
   "dropout_prob": 0,
+  "dtype": "float32",
   "embedding_init_range": 0.02,
   "eos_token_id": 4,
   "ffn_bias": false,
   "padded_vocab_size": 32,
   "pre_activation_layer_norm": true,
   "rms_norm": true,
+  "transformers_version": "4.56.1",
   "unk_token_id": 1,
   "vocab_path": "conf/tokenizer/amplify_vocab.txt",
   "vocab_size": 27