Taizo Kaneko
commited on
Commit
·
3ba50ba
1
Parent(s):
912b503
commit files to HF hub
Browse files- config.json +2 -1
- fasttext_jp_embedding.py +1 -1
- fasttext_jp_tokenizer.py +2 -0
config.json
CHANGED
|
@@ -7,7 +7,8 @@
|
|
| 7 |
"AutoModel": "fasttext_jp_embedding.FastTextJpModel"
|
| 8 |
},
|
| 9 |
"hidden_size": 300,
|
| 10 |
-
"model_type": "
|
|
|
|
| 11 |
"torch_dtype": "float32",
|
| 12 |
"transformers_version": "4.23.1",
|
| 13 |
"vocab_size": 2000000
|
|
|
|
| 7 |
"AutoModel": "fasttext_jp_embedding.FastTextJpModel"
|
| 8 |
},
|
| 9 |
"hidden_size": 300,
|
| 10 |
+
"model_type": "fasttext_jp",
|
| 11 |
+
"tokenizer_class": "FastTextJpTokenizer",
|
| 12 |
"torch_dtype": "float32",
|
| 13 |
"transformers_version": "4.23.1",
|
| 14 |
"vocab_size": 2000000
|
fasttext_jp_embedding.py
CHANGED
|
@@ -6,7 +6,7 @@ import torch
|
|
| 6 |
|
| 7 |
|
| 8 |
class FastTextJpConfig(PretrainedConfig):
|
| 9 |
-
model_type = "
|
| 10 |
|
| 11 |
def __init__(self, **kwargs):
|
| 12 |
super().__init__(**kwargs)
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
class FastTextJpConfig(PretrainedConfig):
|
| 9 |
+
model_type = "fasttext_jp"
|
| 10 |
|
| 11 |
def __init__(self, **kwargs):
|
| 12 |
super().__init__(**kwargs)
|
fasttext_jp_tokenizer.py
CHANGED
|
@@ -28,6 +28,8 @@ def load_stoi(vocab_file: str) -> dict[str, int]:
|
|
| 28 |
|
| 29 |
|
| 30 |
class FastTextJpTokenizer(MeCabTokenizer):
|
|
|
|
|
|
|
| 31 |
vocab_files_names = VOCAB_FILES_NAMES
|
| 32 |
|
| 33 |
def __init__(self,
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
class FastTextJpTokenizer(MeCabTokenizer):
|
| 31 |
+
model_type = "fasttext_jp"
|
| 32 |
+
|
| 33 |
vocab_files_names = VOCAB_FILES_NAMES
|
| 34 |
|
| 35 |
def __init__(self,
|