Spaces:
Running
Running
from constants import * | |
class GPT2Config: | |
def __init__(self, vocab_size_or_config_json_file=50257, n_positions=MAX_LENGTH, n_ctx=MAX_LENGTH, n_embd=768, n_layer=12, n_head=12, layer_norm_epsilon=1e-05, initializer_range=0.02): | |
self.vocab_size = vocab_size_or_config_json_file | |
self.n_ctx = n_ctx | |
self.n_positions = n_positions | |
self.n_embd = n_embd | |
self.n_layer = n_layer | |
self.n_head = n_head | |
self.layer_norm_epsilon = layer_norm_epsilon | |
self.initializer_range = initializer_range | |
def from_dict(cls, config_dict): | |
return cls(**config_dict) | |
class MBartConfig: | |
def __init__(self, vocab_size, d_model, num_layers, num_heads, pad_token_id, eos_token_id): | |
self.vocab_size = vocab_size | |
self.d_model = d_model | |
self.encoder_layers = num_layers | |
self.decoder_layers = num_layers | |
self.encoder_attention_heads = num_heads | |
self.decoder_attention_heads = num_heads | |
self.encoder_ffn_dim = d_model * 4 | |
self.decoder_ffn_dim = d_model * 4 | |
self.dropout = 0.1 | |
self.attention_dropout = 0.0 | |
self.activation_dropout = 0.0 | |
self.max_position_embeddings = 1024 | |
self.init_std = 0.02 | |
self.layer_norm_eps = 1e-5 | |
self.pad_token_id = pad_token_id | |
self.eos_token_id = eos_token_id | |
self.bos_token_id = 0 | |
self.decoder_start_token_id = 2 | |
self.output_past = True | |
self.scale_embedding = True | |
self.use_cache = True | |
self.num_hidden_layers = num_layers | |
class CodeGenConfig: | |
def __init__(self, vocab_size, n_embd, n_layer, n_head): | |
self.vocab_size = vocab_size | |
self.n_embd = n_embd | |
self.n_layer = n_layer | |
self.n_head = n_head | |
self.n_positions = 2048 | |
self.resid_pdrop = 0.1 | |
self.embd_pdrop = 0.1 | |
self.attn_pdrop = 0.1 | |
self.activation_function = "gelu_new" | |
self.n_ctx = 2048 | |
self.pad_token_id = 50256 | |
self.eos_token_id = 50256 | |
self.initializer_range = 0.02 | |
class SummarizationConfig: | |
def __init__(self): | |
self.vocab_size = 50265 | |
self.max_position_embeddings = 1024 | |
self.encoder_layers = 12 | |
self.encoder_ffn_dim = 4096 | |
self.encoder_attention_heads = 16 | |
self.decoder_layers = 12 | |
self.decoder_ffn_dim = 4096 | |
self.decoder_attention_heads = 16 | |
self.encoder_layerdrop = 0.0 | |
self.decoder_layerdrop = 0.0 | |
self.activation_function = "gelu" | |
self.d_model = 1024 | |
self.dropout = 0.1 | |
self.attention_dropout = 0.0 | |
self.activation_dropout = 0.0 | |
self.init_std = 0.02 | |
self.classifier_dropout = 0.0 | |
self.num_labels = 3 | |
self.pad_token_id = 1 | |
self.bos_token_id = 0 | |
self.eos_token_id = 2 | |
self.layer_norm_eps = 1e-05 | |
self.num_beams = 4 | |
self.early_stopping = True | |
self.max_length = 100 | |
self.min_length = 30 | |
self.scale_embedding = False | |
class Clip4ClipConfig: | |
def __init__(self, vocab_size=30522, hidden_size=512, num_hidden_layers=6, num_attention_heads=8, intermediate_size=2048, hidden_act="gelu", hidden_dropout_prob=0.0, attention_probs_dropout_prob=0.0, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, bos_token_id=1, eos_token_id=2, **kwargs): | |
self.vocab_size = vocab_size | |
self.hidden_size = hidden_size | |
self.num_hidden_layers = num_hidden_layers | |
self.num_attention_heads = num_attention_heads | |
self.intermediate_size = intermediate_size | |
self.hidden_act = hidden_act | |
self.hidden_dropout_prob = hidden_dropout_prob | |
self.attention_probs_dropout_prob = attention_probs_dropout_prob | |
self.max_position_embeddings = max_position_embeddings | |
self.type_vocab_size = type_vocab_size | |
self.initializer_range = initializer_range | |
self.layer_norm_eps = layer_norm_eps | |
self.pad_token_id = pad_token_id | |
self.bos_token_id = bos_token_id | |
self.eos_token_id = eos_token_id | |
self.all_head_size = self.num_attention_heads * self.hidden_size | |
self.attention_head_size = int(self.hidden_size / self.num_attention_heads) | |
for key, value in kwargs.items(): | |
setattr(self, key, value) | |
def from_dict(cls, config_dict): | |
return cls(**config_dict) | |
class MusicGenConfig: | |
def __init__(self, vocab_size=2048, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, layer_norm_eps=1e-05, initializer_range=0.02, pad_token_id=0, bos_token_id=1, eos_token_id=2, n_positions=2048, n_ctx=2048, **kwargs): | |
self.vocab_size = vocab_size | |
self.hidden_size = hidden_size | |
self.num_hidden_layers = num_hidden_layers | |
self.num_attention_heads = num_attention_heads | |
self.intermediate_size = intermediate_size | |
self.hidden_act = hidden_act | |
self.hidden_dropout_prob = hidden_dropout_prob | |
self.attention_probs_dropout_prob = attention_probs_dropout_prob | |
self.layer_norm_eps = layer_norm_eps | |
self.initializer_range = initializer_range | |
self.pad_token_id = pad_token_id | |
self.bos_token_id = bos_token_id | |
self.eos_token_id = eos_token_id | |
self.n_positions = n_positions | |
self.n_ctx = n_ctx | |
self.all_head_size = self.num_attention_heads * self.hidden_size | |
for key, value in kwargs.items(): | |
setattr(self, key, value) | |
def from_dict(cls, config_dict): | |
return cls(**config_dict) | |
class BartConfig: | |
def __init__(self, vocab_size=50265, max_position_embeddings=1024, encoder_layers=12, encoder_ffn_dim=4096, encoder_attention_heads=16, decoder_layers=12, decoder_ffn_dim=4096, decoder_attention_heads=16, encoder_layerdrop=0.0, decoder_layerdrop=0.0, activation_function="gelu", d_model=1024, dropout=0.1, attention_dropout=0.0, activation_dropout=0.0, init_std=0.02, classifier_dropout=0.0, num_labels=3, pad_token_id=1, bos_token_id=0, eos_token_id=2, layer_norm_eps=1e-05, num_beams=4, early_stopping=True, max_length=100, min_length=30, scale_embedding=False, **kwargs): | |
self.vocab_size = vocab_size | |
self.max_position_embeddings = max_position_embeddings | |
self.encoder_layers = encoder_layers | |
self.encoder_ffn_dim = encoder_ffn_dim | |
self.encoder_attention_heads = encoder_attention_heads | |
self.decoder_layers = decoder_layers | |
self.decoder_ffn_dim = decoder_ffn_dim | |
self.decoder_attention_heads = decoder_attention_heads | |
self.encoder_layerdrop = encoder_layerdrop | |
self.decoder_layerdrop = decoder_layerdrop | |
self.activation_function = activation_function | |
self.d_model = d_model | |
self.dropout = dropout | |
self.attention_dropout = attention_dropout | |
self.activation_dropout = activation_dropout | |
self.init_std = init_std | |
self.classifier_dropout = classifier_dropout | |
self.num_labels = num_labels | |
self.pad_token_id = pad_token_id | |
self.bos_token_id = bos_token_id | |
self.eos_token_id = eos_token_id | |
self.layer_norm_eps = layer_norm_eps | |
self.num_beams = num_beams | |
self.early_stopping = True | |
self.max_length = max_length | |
self.min_length = min_length | |
self.scale_embedding = False | |
for key, value in kwargs.items(): | |
setattr(self, key, value) | |
def from_dict(cls, config_dict): | |
return cls(**config_dict) | |
class OpenLRMConfig: | |
def __init__(self, obj_dim=1024, hidden_dim=512, num_layers=6, num_heads=8, dropout_prob=0.1, **kwargs): | |
self.obj_dim = obj_dim | |
self.hidden_dim = hidden_dim | |
self.num_layers = num_layers | |
self.num_heads = num_heads | |
self.dropout_prob = dropout_prob | |
self.all_head_size = self.num_heads * self.hidden_dim | |
for key, value in kwargs.items(): | |
setattr(self, key, value) | |
def from_dict(cls, config_dict): | |
return cls(**config_dict) | |
class UNet2DConditionModelConfig: | |
def __init__(self, sample_size=64, layers_per_block=2, block_out_channels=[320, 640, 1280, 1280], downsample=[2, 2, 2, 2], upsample=[2, 2, 2, 2], cross_attention_dim=768, act_fn="silu", norm_num_groups=32, num_attention_heads=8, in_channels=4, out_channels=4, attention_head_dim=64, **kwargs): | |
self.sample_size = sample_size | |
self.layers_per_block = layers_per_block | |
self.block_out_channels = block_out_channels | |
self.downsample = downsample | |
self.upsample = upsample | |
self.cross_attention_dim = cross_attention_dim | |
self.act_fn = act_fn | |
self.norm_num_groups = norm_num_groups | |
self.num_attention_heads = num_attention_heads | |
self.in_channels = in_channels | |
self.out_channels = out_channels | |
self.attention_head_dim = attention_head_dim | |
for key, value in kwargs.items(): | |
setattr(self, key, value) | |
def from_dict(cls, config_dict): | |
return cls(**config_dict) | |
class AutoencoderKLConfig: | |
def __init__(self, **kwargs): | |
self.sample_size = 64 | |
self.latent_channels = 4 | |
self.layers_per_block = 2 | |
self.block_out_channels = [128, 256, 512, 512] | |
self.downsample = [2, 2, 2, 2] | |
self.upsample = [2, 2, 2, 2] | |
self.act_fn = "silu" | |
self.norm_num_groups = 32 | |
self.num_channels_every_n_layers = 2 | |
for key, value in kwargs.items(): | |
setattr(self, key, value) | |
def from_dict(cls, config_dict): | |
return cls(**config_dict) |