rename tokenizer

Files changed (2) hide show

tokenization_luke_bert_japanese.py → tokenization_ubke_bert_japanese.py RENAMED Viewed

@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Tokenization classes for LUKE."""
 import collections
 import copy
@@ -57,7 +57,7 @@ logger = logging.get_logger(__name__)
 VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "entity_vocab_file": "entity_vocab.json", "spm_file": "spiece.model"}
-class LukeBertJapaneseTokenizer(PreTrainedTokenizer):
     vocab_files_names = VOCAB_FILES_NAMES
     model_input_names = ["input_ids", "attention_mask", "position_ids"]

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""Tokenization classes for UBKE."""
 import collections
 import copy
 VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt", "entity_vocab_file": "entity_vocab.json", "spm_file": "spiece.model"}
+class UbkeBertJapaneseTokenizer(PreTrainedTokenizer):
     vocab_files_names = VOCAB_FILES_NAMES
     model_input_names = ["input_ids", "attention_mask", "position_ids"]

tokenizer_config.json CHANGED Viewed

@@ -69,7 +69,7 @@
   ],
   "auto_map": {
     "AutoTokenizer": [
-      "tokenization_luke_bert_japanese.LukeBertJapaneseTokenizer",
       null
     ]
   },
@@ -99,7 +99,7 @@
   "subword_tokenizer_type": "wordpiece",
   "sudachi_kwargs": null,
   "task": null,
-  "tokenizer_class": "LukeBertJapaneseTokenizer",
   "unk_token": "[UNK]",
   "word_tokenizer_type": "mecab"
-}

   ],
   "auto_map": {
     "AutoTokenizer": [
+      "tokenization_ubke_bert_japanese.UbkeBertJapaneseTokenizer",
       null
     ]
   },
   "subword_tokenizer_type": "wordpiece",
   "sudachi_kwargs": null,
   "task": null,
+  "tokenizer_class": "UbkeBertJapaneseTokenizer",
   "unk_token": "[UNK]",
   "word_tokenizer_type": "mecab"
+}