KenyaNonaka0210 commited on
Commit
5b5f554
·
1 Parent(s): 28698b8

rename tokenizer

Browse files
tokenization_luke_bert_japanese.py → tokenization_ubke_bert_japanese.py RENAMED
@@ -12,7 +12,7 @@
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
- """Tokenization classes for LUKE."""
16
 
17
  import collections
18
  import copy
@@ -57,7 +57,7 @@ logger = logging.get_logger(__name__)
57
  VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "entity_vocab_file": "entity_vocab.json", "spm_file": "spiece.model"}
58
 
59
 
60
- class LukeBertJapaneseTokenizer(PreTrainedTokenizer):
61
  vocab_files_names = VOCAB_FILES_NAMES
62
  model_input_names = ["input_ids", "attention_mask", "position_ids"]
63
 
 
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
+ """Tokenization classes for UBKE."""
16
 
17
  import collections
18
  import copy
 
57
  VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "entity_vocab_file": "entity_vocab.json", "spm_file": "spiece.model"}
58
 
59
 
60
+ class UbkeBertJapaneseTokenizer(PreTrainedTokenizer):
61
  vocab_files_names = VOCAB_FILES_NAMES
62
  model_input_names = ["input_ids", "attention_mask", "position_ids"]
63
 
tokenizer_config.json CHANGED
@@ -69,7 +69,7 @@
69
  ],
70
  "auto_map": {
71
  "AutoTokenizer": [
72
- "tokenization_luke_bert_japanese.LukeBertJapaneseTokenizer",
73
  null
74
  ]
75
  },
@@ -99,7 +99,7 @@
99
  "subword_tokenizer_type": "wordpiece",
100
  "sudachi_kwargs": null,
101
  "task": null,
102
- "tokenizer_class": "LukeBertJapaneseTokenizer",
103
  "unk_token": "[UNK]",
104
  "word_tokenizer_type": "mecab"
105
- }
 
69
  ],
70
  "auto_map": {
71
  "AutoTokenizer": [
72
+ "tokenization_ubke_bert_japanese.UbkeBertJapaneseTokenizer",
73
  null
74
  ]
75
  },
 
99
  "subword_tokenizer_type": "wordpiece",
100
  "sudachi_kwargs": null,
101
  "task": null,
102
+ "tokenizer_class": "UbkeBertJapaneseTokenizer",
103
  "unk_token": "[UNK]",
104
  "word_tokenizer_type": "mecab"
105
+ }