MagedSaeed commited on
Commit
9946b07
·
verified ·
1 Parent(s): 8d0ce5f

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_script.py +7 -5
tokenizer_script.py CHANGED
@@ -113,14 +113,16 @@ class CharacterTokenizer(PreTrainedTokenizer):
113
 
114
  @classmethod
115
  def from_json(cls, vocab_file, **kwargs):
116
- with open(vocab_file, 'r', encoding='utf-8') as f:
117
- vocab = json.load(f)
118
-
119
- return cls(vocab=vocab, **kwargs)
 
120
 
121
  @classmethod
122
  def from_vocab(cls, vocab, **kwargs):
123
- return cls(vocab=vocab, **kwargs)
 
124
 
125
  @classmethod
126
  def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
 
113
 
114
  @classmethod
115
  def from_json(cls, vocab_file, **kwargs):
116
+ print('vocab file is:',vocab_file)
117
+ with open(vocab_file, 'r', encoding='utf-8') as f:
118
+ vocab = json.load(f)
119
+
120
+ return cls(vocab=vocab, **kwargs)
121
 
122
  @classmethod
123
  def from_vocab(cls, vocab, **kwargs):
124
+ print('vocab are:',vocab)
125
+ return cls(vocab=vocab, **kwargs)
126
 
127
  @classmethod
128
  def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):