Pratik commited on
Commit
112bda6
·
1 Parent(s): 9412fcc

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"'": 0, "ઁ": 2, "ં": 3, "ઃ": 4, "અ": 5, "આ": 6, "ઇ": 7, "ઈ": 8, "ઉ": 9, "ઊ": 10, "ઋ": 11, "ઍ": 12, "એ": 13, "ઐ": 14, "ઑ": 15, "ઓ": 16, "ઔ": 17, "ક": 18, "ખ": 19, "ગ": 20, "ઘ": 21, "ચ": 22, "છ": 23, "જ": 24, "ઝ": 25, "ઞ": 26, "ટ": 27, "ઠ": 28, "ડ": 29, "ઢ": 30, "ણ": 31, "ત": 32, "થ": 33, "દ": 34, "ધ": 35, "ન": 36, "પ": 37, "ફ": 38, "બ": 39, "ભ": 40, "મ": 41, "ય": 42, "ર": 43, "લ": 44, "ળ": 45, "વ": 46, "શ": 47, "ષ": 48, "સ": 49, "હ": 50, "઼": 51, "ા": 52, "િ": 53, "ી": 54, "ુ": 55, "ૂ": 56, "ૃ": 57, "ૅ": 58, "ે": 59, "ૈ": 60, "ૉ": 61, "ો": 62, "ૌ": 63, "્": 64, "ૠ": 65, "ૢ": 66, "૦": 67, "૧": 68, "૨": 69, "૩": 70, "૪": 71, "૫": 72, "૬": 73, "૭": 74, "૮": 75, "૯": 76, "‘": 77, "’": 78, "|": 1, "[UNK]": 79, "[PAD]": 80}