aisuko
/

phishing-binary-classification

Text Classification

Generated from Trainer

Model card Files Files and versions

Metrics Training metrics Community

aisuko commited on Dec 2, 2024

Commit

c398edb

·

verified ·

1 Parent(s): ee9625a

Upload tokenizer

Files changed (3) hide show

special_tokens_map.json +35 -5
tokenizer.json +10 -1
tokenizer_config.json +5 -1

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,37 @@
 {
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
 }

 {
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer.json CHANGED Viewed

@@ -6,7 +6,16 @@
     "strategy": "LongestFirst",
     "stride": 0
   },
-  "padding": null,
   "added_tokens": [
     {
       "id": 0,

     "strategy": "LongestFirst",
     "stride": 0
   },
+  "padding": {
+    "strategy": {
+      "Fixed": 512
+    },
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 0,
+    "pad_type_id": 0,
+    "pad_token": "[PAD]"
+  },
   "added_tokens": [
     {
       "id": 0,

tokenizer_config.json CHANGED Viewed

@@ -41,15 +41,19 @@
       "special": true
     }
   },
-  "clean_up_tokenization_spaces": false,
   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

       "special": true
     }
   },
+  "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
+  "max_length": 512,
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }