Feature Extraction
Transformers
PyTorch
English
Chinese
UniViTAR-1B / config.json
LimengQiao's picture
add: UniViTAR models
daefeac
{
"resolution_mode": "native",
"min_tokens": 256,
"max_tokens": 16384,
"patch_size": 14,
"resize_factor": 2,
"spatial_merge_size": 1,
"temporal_patch_size": 2,
"num_hidden_layers": 32,
"num_attention_heads": 24,
"hidden_size": 1920,
"intermediate_size": 7680,
"pe_type": "rope2d",
"norm_type": "RMSNorm",
"hidden_act": "SwiGLU",
"init_method": "xavier",
"image_mean": [0.485, 0.456, 0.406],
"image_std": [0.229, 0.224, 0.225]
}