Hhhh / configs.py
Hjgugugjhuhjggg's picture
Upload 28 files
e83e49f verified
from constants import *
class GPT2Config:
def __init__(self, vocab_size_or_config_json_file=50257, n_positions=MAX_LENGTH, n_ctx=MAX_LENGTH, n_embd=768, n_layer=12, n_head=12, layer_norm_epsilon=1e-05, initializer_range=0.02):
self.vocab_size = vocab_size_or_config_json_file
self.n_ctx = n_ctx
self.n_positions = n_positions
self.n_embd = n_embd
self.n_layer = n_layer
self.n_head = n_head
self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_range = initializer_range
@classmethod
def from_dict(cls, config_dict):
return cls(**config_dict)
class MBartConfig:
def __init__(self, vocab_size, d_model, num_layers, num_heads, pad_token_id, eos_token_id):
self.vocab_size = vocab_size
self.d_model = d_model
self.encoder_layers = num_layers
self.decoder_layers = num_layers
self.encoder_attention_heads = num_heads
self.decoder_attention_heads = num_heads
self.encoder_ffn_dim = d_model * 4
self.decoder_ffn_dim = d_model * 4
self.dropout = 0.1
self.attention_dropout = 0.0
self.activation_dropout = 0.0
self.max_position_embeddings = 1024
self.init_std = 0.02
self.layer_norm_eps = 1e-5
self.pad_token_id = pad_token_id
self.eos_token_id = eos_token_id
self.bos_token_id = 0
self.decoder_start_token_id = 2
self.output_past = True
self.scale_embedding = True
self.use_cache = True
self.num_hidden_layers = num_layers
class CodeGenConfig:
def __init__(self, vocab_size, n_embd, n_layer, n_head):
self.vocab_size = vocab_size
self.n_embd = n_embd
self.n_layer = n_layer
self.n_head = n_head
self.n_positions = 2048
self.resid_pdrop = 0.1
self.embd_pdrop = 0.1
self.attn_pdrop = 0.1
self.activation_function = "gelu_new"
self.n_ctx = 2048
self.pad_token_id = 50256
self.eos_token_id = 50256
self.initializer_range = 0.02
class SummarizationConfig:
def __init__(self):
self.vocab_size = 50265
self.max_position_embeddings = 1024
self.encoder_layers = 12
self.encoder_ffn_dim = 4096
self.encoder_attention_heads = 16
self.decoder_layers = 12
self.decoder_ffn_dim = 4096
self.decoder_attention_heads = 16
self.encoder_layerdrop = 0.0
self.decoder_layerdrop = 0.0
self.activation_function = "gelu"
self.d_model = 1024
self.dropout = 0.1
self.attention_dropout = 0.0
self.activation_dropout = 0.0
self.init_std = 0.02
self.classifier_dropout = 0.0
self.num_labels = 3
self.pad_token_id = 1
self.bos_token_id = 0
self.eos_token_id = 2
self.layer_norm_eps = 1e-05
self.num_beams = 4
self.early_stopping = True
self.max_length = 100
self.min_length = 30
self.scale_embedding = False
class Clip4ClipConfig:
def __init__(self, vocab_size=30522, hidden_size=512, num_hidden_layers=6, num_attention_heads=8, intermediate_size=2048, hidden_act="gelu", hidden_dropout_prob=0.0, attention_probs_dropout_prob=0.0, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, bos_token_id=1, eos_token_id=2, **kwargs):
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.initializer_range = initializer_range
self.layer_norm_eps = layer_norm_eps
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.all_head_size = self.num_attention_heads * self.hidden_size
self.attention_head_size = int(self.hidden_size / self.num_attention_heads)
for key, value in kwargs.items():
setattr(self, key, value)
@classmethod
def from_dict(cls, config_dict):
return cls(**config_dict)
class MusicGenConfig:
def __init__(self, vocab_size=2048, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, layer_norm_eps=1e-05, initializer_range=0.02, pad_token_id=0, bos_token_id=1, eos_token_id=2, n_positions=2048, n_ctx=2048, **kwargs):
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.layer_norm_eps = layer_norm_eps
self.initializer_range = initializer_range
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.n_positions = n_positions
self.n_ctx = n_ctx
self.all_head_size = self.num_attention_heads * self.hidden_size
for key, value in kwargs.items():
setattr(self, key, value)
@classmethod
def from_dict(cls, config_dict):
return cls(**config_dict)
class BartConfig:
def __init__(self, vocab_size=50265, max_position_embeddings=1024, encoder_layers=12, encoder_ffn_dim=4096, encoder_attention_heads=16, decoder_layers=12, decoder_ffn_dim=4096, decoder_attention_heads=16, encoder_layerdrop=0.0, decoder_layerdrop=0.0, activation_function="gelu", d_model=1024, dropout=0.1, attention_dropout=0.0, activation_dropout=0.0, init_std=0.02, classifier_dropout=0.0, num_labels=3, pad_token_id=1, bos_token_id=0, eos_token_id=2, layer_norm_eps=1e-05, num_beams=4, early_stopping=True, max_length=100, min_length=30, scale_embedding=False, **kwargs):
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.encoder_layers = encoder_layers
self.encoder_ffn_dim = encoder_ffn_dim
self.encoder_attention_heads = encoder_attention_heads
self.decoder_layers = decoder_layers
self.decoder_ffn_dim = decoder_ffn_dim
self.decoder_attention_heads = decoder_attention_heads
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.activation_function = activation_function
self.d_model = d_model
self.dropout = dropout
self.attention_dropout = attention_dropout
self.activation_dropout = activation_dropout
self.init_std = init_std
self.classifier_dropout = classifier_dropout
self.num_labels = num_labels
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.layer_norm_eps = layer_norm_eps
self.num_beams = num_beams
self.early_stopping = True
self.max_length = max_length
self.min_length = min_length
self.scale_embedding = False
for key, value in kwargs.items():
setattr(self, key, value)
@classmethod
def from_dict(cls, config_dict):
return cls(**config_dict)
class OpenLRMConfig:
def __init__(self, obj_dim=1024, hidden_dim=512, num_layers=6, num_heads=8, dropout_prob=0.1, **kwargs):
self.obj_dim = obj_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.num_heads = num_heads
self.dropout_prob = dropout_prob
self.all_head_size = self.num_heads * self.hidden_dim
for key, value in kwargs.items():
setattr(self, key, value)
@classmethod
def from_dict(cls, config_dict):
return cls(**config_dict)
class UNet2DConditionModelConfig:
def __init__(self, sample_size=64, layers_per_block=2, block_out_channels=[320, 640, 1280, 1280], downsample=[2, 2, 2, 2], upsample=[2, 2, 2, 2], cross_attention_dim=768, act_fn="silu", norm_num_groups=32, num_attention_heads=8, in_channels=4, out_channels=4, attention_head_dim=64, **kwargs):
self.sample_size = sample_size
self.layers_per_block = layers_per_block
self.block_out_channels = block_out_channels
self.downsample = downsample
self.upsample = upsample
self.cross_attention_dim = cross_attention_dim
self.act_fn = act_fn
self.norm_num_groups = norm_num_groups
self.num_attention_heads = num_attention_heads
self.in_channels = in_channels
self.out_channels = out_channels
self.attention_head_dim = attention_head_dim
for key, value in kwargs.items():
setattr(self, key, value)
@classmethod
def from_dict(cls, config_dict):
return cls(**config_dict)
class AutoencoderKLConfig:
def __init__(self, **kwargs):
self.sample_size = 64
self.latent_channels = 4
self.layers_per_block = 2
self.block_out_channels = [128, 256, 512, 512]
self.downsample = [2, 2, 2, 2]
self.upsample = [2, 2, 2, 2]
self.act_fn = "silu"
self.norm_num_groups = 32
self.num_channels_every_n_layers = 2
for key, value in kwargs.items():
setattr(self, key, value)
@classmethod
def from_dict(cls, config_dict):
return cls(**config_dict)