amaanbadure commited on
Commit
6224e0e
·
1 Parent(s): d38a943

Upload config

Browse files
Files changed (2) hide show
  1. config.json +31 -0
  2. configuration_sp.py +68 -0
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "attn_pdrop": 0.1,
4
+ "auto_map": {
5
+ "AutoConfig": "configuration_sp.SPConfig"
6
+ },
7
+ "bos_token_id": 2,
8
+ "dropout": 0.2,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 3,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2_sentencepiece",
14
+ "n_embd": 384,
15
+ "n_head": 4,
16
+ "n_inner": null,
17
+ "n_layer": 4,
18
+ "n_positions": 1024,
19
+ "reorder_and_upcast_attn": false,
20
+ "resid_pdrop": 0.1,
21
+ "scale_attn_by_inverse_layer_idx": false,
22
+ "scale_attn_weights": true,
23
+ "summary_activation": null,
24
+ "summary_first_dropout": 0.1,
25
+ "summary_proj_to_labels": true,
26
+ "summary_type": "cls_index",
27
+ "summary_use_proj": true,
28
+ "transformers_version": "4.35.2",
29
+ "use_cache": true,
30
+ "vocab_size": 1000
31
+ }
configuration_sp.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+ from typing import List
3
+ #copied from gpt2
4
+
5
+ class SPConfig(PretrainedConfig):
6
+
7
+ model_type = "gpt2_sentencepiece"
8
+ keys_to_ignore_at_inference = ["past_key_values"]
9
+ attribute_map = {
10
+ "hidden_size": "n_embd",
11
+ "max_position_embeddings": "n_positions",
12
+ "num_attention_heads": "n_head",
13
+ "num_hidden_layers": "n_layer",
14
+ }
15
+
16
+ def __init__(
17
+ self,
18
+ vocab_size=1000,
19
+ n_positions=1024,
20
+ n_embd=768,
21
+ n_layer=12,
22
+ n_head=12,
23
+ n_inner=None,
24
+ activation_function="gelu_new",
25
+ resid_pdrop=0.1,
26
+ embd_pdrop=0.1,
27
+ attn_pdrop=0.1,
28
+ layer_norm_epsilon=1e-5,
29
+ initializer_range=0.02,
30
+ summary_type="cls_index",
31
+ summary_use_proj=True,
32
+ summary_activation=None,
33
+ summary_proj_to_labels=True,
34
+ summary_first_dropout=0.1,
35
+ scale_attn_weights=True,
36
+ use_cache=True,
37
+ bos_token_id=2,
38
+ eos_token_id=3,
39
+ scale_attn_by_inverse_layer_idx=False,
40
+ reorder_and_upcast_attn=False,
41
+ **kwargs,
42
+ ):
43
+ self.vocab_size = vocab_size
44
+ self.n_positions = n_positions
45
+ self.n_embd = n_embd
46
+ self.n_layer = n_layer
47
+ self.n_head = n_head
48
+ self.n_inner = n_inner
49
+ self.activation_function = activation_function
50
+ self.resid_pdrop = resid_pdrop
51
+ self.embd_pdrop = embd_pdrop
52
+ self.attn_pdrop = attn_pdrop
53
+ self.layer_norm_epsilon = layer_norm_epsilon
54
+ self.initializer_range = initializer_range
55
+ self.summary_type = summary_type
56
+ self.summary_use_proj = summary_use_proj
57
+ self.summary_activation = summary_activation
58
+ self.summary_first_dropout = summary_first_dropout
59
+ self.summary_proj_to_labels = summary_proj_to_labels
60
+ self.scale_attn_weights = scale_attn_weights
61
+ self.use_cache = use_cache
62
+ self.scale_attn_by_inverse_layer_idx = scale_attn_by_inverse_layer_idx
63
+ self.reorder_and_upcast_attn = reorder_and_upcast_attn
64
+
65
+ self.bos_token_id = bos_token_id
66
+ self.eos_token_id = eos_token_id
67
+
68
+ super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)