Yanisadel commited on
Commit
f15dbe4
·
verified ·
1 Parent(s): a393106

Upload sCT

Browse files
Files changed (2) hide show
  1. config.json +1 -1
  2. sct.py +65 -3
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "attention_heads": 16,
7
  "attention_maps_to_save": [],
8
  "auto_map": {
9
- "AutoConfig": "config.sCTConfig",
10
  "AutoModel": "sct.sCT"
11
  },
12
  "cell_len": 19968,
 
6
  "attention_heads": 16,
7
  "attention_maps_to_save": [],
8
  "auto_map": {
9
+ "AutoConfig": "sct.sCTConfig",
10
  "AutoModel": "sct.sCT"
11
  },
12
  "cell_len": 19968,
sct.py CHANGED
@@ -6,9 +6,7 @@ import numpy as np
6
  import torch
7
  import torch.nn as nn
8
  import torch.nn.functional as F # noqa: N812
9
- from transformers import PreTrainedModel
10
-
11
- from projects.biobrain_p3.sCellTransformer.models.config import sCTConfig
12
 
13
 
14
  class GeLU(nn.Module):
@@ -551,6 +549,70 @@ class LMHead(nn.Module):
551
  return out
552
 
553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  class sCT(PreTrainedModel): # noqa: N801
555
  config_class = sCTConfig
556
 
 
6
  import torch
7
  import torch.nn as nn
8
  import torch.nn.functional as F # noqa: N812
9
+ from transformers import PretrainedConfig, PreTrainedModel
 
 
10
 
11
 
12
  class GeLU(nn.Module):
 
549
  return out
550
 
551
 
552
+ @dataclass
553
+ class sCTConfig(PretrainedConfig): # noqa: N801
554
+ model_type = "sCT"
555
+
556
+ def __init__(self, **kwargs): # type: ignore
557
+ self.alphabet_size = kwargs.get("alphabet_size", 7)
558
+ self.pad_token_id = kwargs.get("pad_token_id", 5)
559
+ self.mask_token_id = kwargs.get("mask_token_id", 6)
560
+ self.cell_len = kwargs.get("cell_len", 19968)
561
+
562
+ self.num_downsamples = kwargs.get("num_downsamples", 8)
563
+ self.attention_heads = kwargs.get("attention_heads", 16)
564
+ self.key_size = kwargs.get("key_size", None)
565
+ self.token_embed_dim = kwargs.get("token_embed_dim", 16)
566
+
567
+ self.embed_dim = kwargs.get("embed_dim", 1024)
568
+ self.ffn_embed_dim = kwargs.get("ffn_embed_dim", 2048)
569
+ self.num_layers = kwargs.get("num_layers", 4)
570
+ self.layer_norm_eps = kwargs.get("layer_norm_eps", 1e-5)
571
+ self.interpolation_method = kwargs.get("interpolation_method", "nearest")
572
+
573
+ # bad hack to satisfy cellnt_celltype_annotation.py:312
574
+ self.max_positions: int = kwargs.get("max_positions", 20480)
575
+ self.num_cells: int = kwargs.get("num_cells", 50)
576
+ self.num_hidden_layers_head: int = kwargs.get("num_hidden_layers_head", 1)
577
+
578
+ self.use_skip_connection: bool = kwargs.get("use_skip_connection", True)
579
+
580
+ # logging
581
+ self.use_gradient_checkpointing: bool = False
582
+
583
+ # return
584
+ self.embeddings_layers_to_save: Tuple[int, ...] = kwargs.get(
585
+ "embeddings_layers_to_save", ()
586
+ )
587
+ self.attention_maps_to_save: list[tuple[int, int]] = kwargs.get(
588
+ "attention_maps_to_save", []
589
+ )
590
+
591
+ # Spatial info configuration
592
+ self.use_spatial_information: bool = kwargs.get(
593
+ "use_spatial_information", False
594
+ )
595
+ self.num_scales: int = kwargs.get("num_scales", 10)
596
+ self.sigma_min: float = kwargs.get("sigma_min", 1.0)
597
+ self.sigma_max: float = kwargs.get("sigma_max", 10.0)
598
+
599
+ super().__init__(**kwargs)
600
+
601
+ def __post_init__(self) -> None: # type: ignore # noqa: N807
602
+ """
603
+ Checks that the given values are compatible.
604
+ """
605
+ if self.key_size is None:
606
+ if not self.embed_dim % self.attention_heads == 0:
607
+ raise ValueError(
608
+ f"When no key size is provided, the embedding dimension"
609
+ f"should be divisible by the number of heads, however "
610
+ f"provided embedding dimension is {self.embed_dim} and "
611
+ f"the number of heads is {self.attention_heads}."
612
+ )
613
+ self.key_size = self.embed_dim // self.attention_heads
614
+
615
+
616
  class sCT(PreTrainedModel): # noqa: N801
617
  config_class = sCTConfig
618