label_ids.append(label[word_idx]) else: label_ids.append(-100) previous_word_idx = word_idx labels.append(label_ids) tokenized_inputs["labels"] = labels return tokenized_inputs To apply the preprocessing function over the entire dataset, use 🤗 Datasets [~datasets.Dataset.map] function.