Update configuration_codeshell.py
Browse files- configuration_codeshell.py +17 -4
configuration_codeshell.py
CHANGED
|
@@ -17,7 +17,6 @@
|
|
| 17 |
# its original forms to accommodate minor architectural differences compared to
|
| 18 |
# GPTBigCode Configuration that trained the model.
|
| 19 |
|
| 20 |
-
# coding=utf-8
|
| 21 |
# Copyright 2023 The BigCode team and HuggingFace Inc. team.
|
| 22 |
#
|
| 23 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -31,7 +30,7 @@
|
|
| 31 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 32 |
# See the License for the specific language governing permissions and
|
| 33 |
# limitations under the License.
|
| 34 |
-
"""
|
| 35 |
|
| 36 |
from transformers.configuration_utils import PretrainedConfig
|
| 37 |
from transformers.utils import logging
|
|
@@ -51,7 +50,7 @@ class CodeShellConfig(PretrainedConfig):
|
|
| 51 |
Args:
|
| 52 |
vocab_size (`int`, *optional*, defaults to 50257):
|
| 53 |
Vocabulary size of the GPT-2 model. Defines the number of different tokens that can be represented by the
|
| 54 |
-
`inputs_ids` passed when calling [`
|
| 55 |
n_positions (`int`, *optional*, defaults to 1024):
|
| 56 |
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
| 57 |
just in case (e.g., 512 or 1024 or 2048).
|
|
@@ -86,7 +85,21 @@ class CodeShellConfig(PretrainedConfig):
|
|
| 86 |
Whether to scale the attention softmax in float32.
|
| 87 |
attention_type (`bool`, *optional*, defaults to `True`):
|
| 88 |
Whether to use Multi-Query Attion (`True`) or Multi-Head Attention (`False`).
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
model_type = "codeshell"
|
| 92 |
keys_to_ignore_at_inference = ["past_key_values"]
|
|
|
|
| 17 |
# its original forms to accommodate minor architectural differences compared to
|
| 18 |
# GPTBigCode Configuration that trained the model.
|
| 19 |
|
|
|
|
| 20 |
# Copyright 2023 The BigCode team and HuggingFace Inc. team.
|
| 21 |
#
|
| 22 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
| 30 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 31 |
# See the License for the specific language governing permissions and
|
| 32 |
# limitations under the License.
|
| 33 |
+
""" Shell configuration"""
|
| 34 |
|
| 35 |
from transformers.configuration_utils import PretrainedConfig
|
| 36 |
from transformers.utils import logging
|
|
|
|
| 50 |
Args:
|
| 51 |
vocab_size (`int`, *optional*, defaults to 50257):
|
| 52 |
Vocabulary size of the GPT-2 model. Defines the number of different tokens that can be represented by the
|
| 53 |
+
`inputs_ids` passed when calling [`ShellModel`].
|
| 54 |
n_positions (`int`, *optional*, defaults to 1024):
|
| 55 |
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
| 56 |
just in case (e.g., 512 or 1024 or 2048).
|
|
|
|
| 85 |
Whether to scale the attention softmax in float32.
|
| 86 |
attention_type (`bool`, *optional*, defaults to `True`):
|
| 87 |
Whether to use Multi-Query Attion (`True`) or Multi-Head Attention (`False`).
|
| 88 |
+
Example:
|
| 89 |
+
|
| 90 |
+
```python
|
| 91 |
+
>>> from configuration_codeshell import CodeShellConfig
|
| 92 |
+
>>> from modeling_codeshell import CodeShellForCausalLM
|
| 93 |
+
|
| 94 |
+
>>> # Initializing a CodeShell configuration
|
| 95 |
+
>>> configuration = CodeShellConfig()
|
| 96 |
+
|
| 97 |
+
>>> # Initializing a model (with random weights) from the configuration
|
| 98 |
+
>>> model = CodeShellForCausalLM(configuration)
|
| 99 |
+
|
| 100 |
+
>>> # Accessing the model configuration
|
| 101 |
+
>>> configuration = model.config
|
| 102 |
+
```"""
|
| 103 |
|
| 104 |
model_type = "codeshell"
|
| 105 |
keys_to_ignore_at_inference = ["past_key_values"]
|