WisdomShell
/

CodeShell-7B

@@ -17,7 +17,6 @@
 # its original forms to accommodate minor architectural differences compared to
 # GPTBigCode Configuration that trained the model.
-# coding=utf-8
 # Copyright 2023 The BigCode team and HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -31,7 +30,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" CodeShell configuration"""
 from transformers.configuration_utils import PretrainedConfig
 from transformers.utils import logging
@@ -51,7 +50,7 @@ class CodeShellConfig(PretrainedConfig):
     Args:
         vocab_size (`int`, *optional*, defaults to 50257):
             Vocabulary size of the GPT-2 model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`CodeShellModel`].
         n_positions (`int`, *optional*, defaults to 1024):
             The maximum sequence length that this model might ever be used with. Typically set this to something large
             just in case (e.g., 512 or 1024 or 2048).
@@ -86,7 +85,21 @@ class CodeShellConfig(PretrainedConfig):
             Whether to scale the attention softmax in float32.
         attention_type (`bool`, *optional*, defaults to `True`):
             Whether to use Multi-Query Attion (`True`) or Multi-Head Attention (`False`).
-    """
     model_type = "codeshell"
     keys_to_ignore_at_inference = ["past_key_values"]

 # its original forms to accommodate minor architectural differences compared to
 # GPTBigCode Configuration that trained the model.
 # Copyright 2023 The BigCode team and HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+""" Shell configuration"""
 from transformers.configuration_utils import PretrainedConfig
 from transformers.utils import logging
     Args:
         vocab_size (`int`, *optional*, defaults to 50257):
             Vocabulary size of the GPT-2 model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`ShellModel`].
         n_positions (`int`, *optional*, defaults to 1024):
             The maximum sequence length that this model might ever be used with. Typically set this to something large
             just in case (e.g., 512 or 1024 or 2048).
             Whether to scale the attention softmax in float32.
         attention_type (`bool`, *optional*, defaults to `True`):
             Whether to use Multi-Query Attion (`True`) or Multi-Head Attention (`False`).
+    Example:
+    ```python
+    >>> from configuration_codeshell import CodeShellConfig
+    >>> from modeling_codeshell import CodeShellForCausalLM
+    >>> # Initializing a CodeShell configuration
+    >>> configuration = CodeShellConfig()
+    >>> # Initializing a model (with random weights) from the configuration
+    >>> model = CodeShellForCausalLM(configuration)
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
     model_type = "codeshell"
     keys_to_ignore_at_inference = ["past_key_values"]