thon | |
from transformers import BertConfig, ViTConfig, VisionEncoderDecoderConfig, VisionEncoderDecoderModel | |
config_encoder = ViTConfig() | |
config_decoder = BertConfig() | |
config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder) | |
model = VisionEncoderDecoderModel(config=config) | |
Initialising VisionEncoderDecoderModel from a pretrained encoder and a pretrained decoder. |