Feature Extraction
Transformers
PyTorch
English
Chinese
UniViTAR-0.6B / config.json
LimengQiao's picture
add: UniViTAR models
e3e52c4
{
"resolution_mode": "native",
"min_tokens": 256,
"max_tokens": 16384,
"patch_size": 14,
"resize_factor": 2,
"spatial_merge_size": 1,
"temporal_patch_size": 2,
"num_hidden_layers": 32,
"num_attention_heads": 16,
"hidden_size": 1280,
"intermediate_size": 5184,
"pe_type": "rope2d",
"norm_type": "RMSNorm",
"hidden_act": "SwiGLU",
"init_method": "xavier",
"image_mean": [0.485, 0.456, 0.406],
"image_std": [0.229, 0.224, 0.225]
}