|
dataset: |
|
N: 512 |
|
P: 512 |
|
S: 512 |
|
cellxgene: |
|
ds_type: h5ad |
|
filter: false |
|
num_datasets: 1139 |
|
train: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_cellxgene_train.csv |
|
val: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_cellxgene_val.csv |
|
cellxgene-tahoe: |
|
ds_type: filtered_h5ad |
|
filter: true |
|
filter_by_species: null |
|
num_datasets: 1139 |
|
train: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_tahoe_cellxgene_train_filtered.csv |
|
val: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_tahoe_cellxgene_val_filtered.csv |
|
chrom_token_right_idx: 2 |
|
cls_token_idx: 3 |
|
current: scbasecamp-cellxgene-tahoe-filtered |
|
name: vci |
|
num_cells: 36238464 |
|
num_train_workers: 32 |
|
num_val_workers: 8 |
|
overrides: |
|
rpe1_top5000_variable: /large_storage/ctc/datasets/vci/validation/rpe1_top5000_variable.h5ad |
|
pad_length: 2048 |
|
pad_token_idx: 0 |
|
scbasecamp-cellxgene-tahoe: |
|
ds_type: filtered_h5ad |
|
filter: true |
|
filter_by_species: null |
|
num_datasets: 15700 |
|
train: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_train.csv |
|
val: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_val.csv |
|
scbasecamp-cellxgene-tahoe-filtered: |
|
ds_type: filtered_h5ad |
|
filter: true |
|
filter_by_species: null |
|
num_datasets: 14420 |
|
train: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_train_filtered.csv |
|
val: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_val_filtered.csv |
|
seed: 42 |
|
embeddings: |
|
current: esm2-cellxgene-basecamp-tahoe |
|
esm2-cellxgene: |
|
all_embeddings: /large_storage/ctc/userspace/aadduri/data/auxillary/Homo_sapiens.GRCh38.gene_symbol_to_embedding_ESM2.pt |
|
ds_emb_mapping: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_cellxgene_ds_mapping.torch |
|
num: 19790 |
|
size: 5120 |
|
valid_genes_masks: null |
|
esm2-cellxgene-basecamp-tahoe: |
|
all_embeddings: /large_storage/ctc/userspace/aadduri/data/auxillary/Homo_sapiens.GRCh38.gene_symbol_to_embedding_ESM2.pt |
|
|
|
ds_emb_mapping: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_ds_mapping.torch |
|
num: 19790 |
|
size: 5120 |
|
valid_genes_masks: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_valid_masks.torch |
|
esm2-cellxgene-tahoe: |
|
all_embeddings: /large_storage/ctc/userspace/aadduri/data/auxillary/Homo_sapiens.GRCh38.gene_symbol_to_embedding_ESM2.pt |
|
ds_emb_mapping: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_ds_mapping.torch |
|
num: 19790 |
|
size: 5120 |
|
valid_genes_masks: /large_storage/ctc/userspace/aadduri/data/auxillary/esm_basecount_tahoe_cellxgene_valid_masks.torch |
|
experiment: |
|
checkpoint: |
|
every_n_train_steps: 1000 |
|
monitor: trainer/train_loss |
|
path: /data/checkpoints |
|
save_top_k: 4 |
|
compiled: false |
|
ddp_timeout: 3600 |
|
deaware: false |
|
limit_val_batches: 100 |
|
local: local |
|
name: vci_1.5.0_600M_basecount_tahoe_cxg |
|
num_epochs: 16 |
|
num_gpus_per_node: 8 |
|
num_nodes: 2 |
|
port: 12400 |
|
profile: |
|
enable_profiler: false |
|
max_steps: 110 |
|
profile_steps: |
|
- 10 |
|
- 100 |
|
val_check_interval: 1000 |
|
loss: |
|
apply_normalization: false |
|
kernel: energy |
|
name: tabular |
|
uniformity: false |
|
model: |
|
batch_size: 48 |
|
batch_tabular_loss: false |
|
counts: true |
|
d_hid: 2048 |
|
dataset_correction: true |
|
dropout: 0.1 |
|
ema: false |
|
ema_decay: 0.999 |
|
ema_update_interval: 1000 |
|
emsize: 2048 |
|
name: vci |
|
nhead: 16 |
|
nlayers: 16 |
|
num_downsample: 1 |
|
output_dim: 2048 |
|
rda: true |
|
sample_rda: false |
|
use_flash_attention: true |
|
variable_masking: true |
|
optimizer: |
|
end: 1.0 |
|
gradient_accumulation_steps: 8 |
|
max_grad_norm: 0.8 |
|
max_lr: 1.0e-05 |
|
reset_lr_on_restart: false |
|
start: 0.01 |
|
weight_decay: 0.01 |
|
zclip: true |
|
task: |
|
mask: 0.2 |
|
tokenizer: |
|
token_dim: 5120 |
|
validations: |
|
diff_exp: |
|
dataset: /large_storage/ctc/datasets/cellxgene/processed/rpe1_top5000_variable.h5ad |
|
dataset_name: rpe1_top5000_variable |
|
enable: false |
|
eval_interval_multiple: 10 |
|
method: null |
|
obs_filter_label: non-targeting |
|
obs_pert_col: gene |
|
top_k_rank: 200 |
|
perturbation: |
|
ctrl_label: non-targeting |
|
dataset: /large_storage/ctc/datasets/vci/validation/replogle_perturbation.h5ad |
|
dataset_name: replogle_perturbation |
|
enable: false |
|
eval_interval_multiple: 10 |
|
pert_col: gene |
|
wandb: |
|
enable: true |
|
project: vci |
|
|