initial commit

Browse files

Files changed (12) hide show

.gitignore +174 -0
LICENSE +57 -0
README.md +76 -0
config.json +4 -0
exaonepath.py +165 -0
networks/__init__.py +0 -0
networks/vit.py +569 -0
pytorch_model.bin +3 -0
requirements.txt +16 -0
utils/__init__.py +0 -0
utils/tensor_utils.py +318 -0
utils/wsi_utils.py +514 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,174 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc

LICENSE CHANGED Viewed

	@@ -0,0 +1,57 @@

+EXAONEPath AI Model License Agreement 1.0 - NC
+This License Agreement (“Agreement”) is entered into between you (“Licensee”) and LG Management Development Institute Co., Ltd. (“Licensor”), governing the use of the EXAONEPath AI Model (“Model”). By downloading, installing, copying, or using the Model, you agree to comply with and be bound by the terms of this Agreement. If you do not agree to all the terms, you must not download, install, copy, or use the Model. This Agreement constitutes a binding legal agreement between the Licensee and Licensor.
+1. Definitions
+1.1 Model: The artificial intelligence model provided by Licensor, which includes any software, algorithms, machine learning models, or related components supplied by Licensor. This definition extends to encompass all updates, enhancements, improvements, bug fixes, patches, or other modifications that may be provided by Licensor from time to time, whether automatically or manually implemented.
+1.2 Derivatives: Any modifications, alterations, enhancements, improvements, adaptations, or derivative works of the Model created by Licensee or any third party. This includes changes made to the Model's architecture, parameters, data processing methods, or any other aspect of the Model that results in a modification of its functionality or output.
+1.3 Output: Any data, results, content, predictions, analyses, insights, or other materials generated by the Model or Derivatives, regardless of whether they are in their original form or have been further processed or modified by the Licensee. This includes, but is not limited to, textual or numerical produced directly or indirectly through the use of the Model.
+1.4 Licensor: LG Management Development Institute Co., Ltd., the owner, developer, and provider of the EXAONEPath AI Model. The Licensor holds all rights, title, and interest in the Model and is responsible for granting licenses to use the Model under the terms specified in this Agreement.
+1.5 Licensee: The individual, organization, corporation, academic institution, government agency, or other entity using or intending to use the Model under the terms and conditions of this Agreement. The Licensee is responsible for ensuring compliance with the Agreement by all authorized users who access or utilize the Model on behalf of the Licensee.
+2. License Grant
+2.1 Grant of License: Subject to the terms and conditions outlined in this Agreement, the Licensor hereby grants the Licensee a limited, non-exclusive, non-transferable, worldwide, and revocable license to:
+a. Access, download, install, and use the Model solely for research purposes. This includes evaluation, testing, academic research and experimentation.
+b. Publicly disclose research results and findings derived from the use of the Model or Derivatives, including publishing papers or presentations.
+c. Modify the Model and create Derivatives based on the Model, provided that such modifications and Derivatives are used exclusively for research purposes. The Licensee may conduct experiments, perform analyses, and apply custom modifications to the Model to explore its capabilities and performance under various scenarios. If the Model is modified, the modified Model must include "EXAONEPath" at the beginning of its name.
+d. Distribute the Model and Derivatives in each case with a copy of this Agreement.
+2.2 Scope of License: The license granted herein does not authorize the Licensee to use the Model for any purpose not explicitly permitted under this Agreement. Any use beyond the scope of this license, including any commercial application or external distribution, is strictly prohibited unless explicitly agreed upon in writing by the Licensor.
+3. Restrictions
+3.1 Commercial Use: The Licensee is expressly prohibited from using the Model, Derivatives, or Output for any commercial purposes, including but not limited to, developing or deploying products, services, or applications that generate revenue, whether directly or indirectly. Any commercial exploitation of the Model or its derivatives requires a separate commercial license agreement with the Licensor. Furthermore, the Licensee shall not use the Model, Derivatives or Output to develop or improve other models, except for research purposes, which is explicitly permitted.
+3.2 Reverse Engineering: The Licensee shall not decompile, disassemble, reverse engineer, or attempt to derive the source code, underlying ideas, algorithms, or structure of the Model, except to the extent that such activities are expressly permitted by applicable law. Any attempt to bypass or circumvent technological protection measures applied to the Model is strictly prohibited.
+3.3 Unlawful Use: The Licensee shall not use the Model and Derivatives for any illegal, fraudulent, or unauthorized activities, nor for any purpose that violates applicable laws or regulations. This includes but is not limited to the creation, distribution, or dissemination of malicious, deceptive, or unlawful content.
+3.4 Ethical Use: The Licensee shall ensure that the Model or Derivatives is used in an ethical and responsible manner, adhering to the following guidelines:
+a. The Model and Derivatives shall not be used to generate, propagate, or amplify false, misleading, or harmful information, including fake news, misinformation, or disinformation.
+b. The Model and Derivatives shall not be employed to create, distribute, or promote content that is discriminatory, harassing, defamatory, abusive, or otherwise offensive to individuals or groups based on race, gender, sexual orientation, religion, nationality, or other protected characteristics.
+c. The Model and Derivatives shall not infringe on the rights of others, including intellectual property rights, privacy rights, or any other rights recognized by law. The Licensee shall obtain all necessary permissions and consents before using the Model and Derivatives in a manner that may impact the rights of third parties.
+d. The Model and Derivatives shall not be used in a way that causes harm, whether physical, mental, emotional, or financial, to individuals, organizations, or communities. The Licensee shall take all reasonable measures to prevent misuse or abuse of the Model and Derivatives that could result in harm or injury.
+4. Ownership
+4.1 Intellectual Property: All rights, title, and interest in and to the Model, including any modifications, Derivatives, and associated documentation, are and shall remain the exclusive property of the Licensor. The Licensee acknowledges that this Agreement does not transfer any ownership rights to the Licensee. All trademarks, service marks, and logos associated with the Model are the property of the Licensor.
+4.2 Output: All output generated by the Model from Licensee Data ("Output") shall be the sole property of the Licensee. Licensor hereby waives any claim of ownership or intellectual property rights to the Output. Licensee is solely responsible for the legality, accuracy, quality, integrity, and use of the Output.
+4.3 Attribution: In any publication or presentation of results obtained using the Model, the Licensee shall provide appropriate attribution to the Licensor, citing the Model's name and version, along with any relevant documentation or references specified by the Licensor.
+5. No Warranty
+5.1 “As-Is” Basis: The Model, Derivatives, and Output are provided on an “as-is” and “as-available” basis, without any warranties or representations of any kind, whether express, implied, or statutory. The Licensor disclaims all warranties, including but not limited to, implied warranties of merchantability, fitness for a particular purpose, accuracy, reliability, non-infringement, or any warranty arising from the course of dealing or usage of trade.
+5.2 Performance and Reliability: The Licensor does not warrant or guarantee that the Model, Derivatives or Output will meet the Licensee’s requirements, that the operation of the Model, Derivatives or Output will be uninterrupted or error-free, or that defects in the Model will be corrected. The Licensee acknowledges that the use of the Model, Derivatives or Output is at its own risk and that the Model, Derivatives or Output may contain bugs, errors, or other limitations.
+5.3 No Endorsement: The Licensor does not endorse, approve, or certify any results, conclusions, or recommendations derived from the use of the Model. The Licensee is solely responsible for evaluating the accuracy, reliability, and suitability of the Model for its intended purposes.
+6. Limitation of Liability
+6.1 No Liability for Damages: To the fullest extent permitted by applicable law, in no event shall the Licensor be liable for any special, incidental, indirect, consequential, exemplary, or punitive damages, including but not limited to, damages for loss of business profits, business interruption, loss of business information, loss of data, or any other pecuniary or non-pecuniary loss arising out of or in connection with the use or inability to use the Model, Derivatives or any Output, even if the Licensor has been advised of the possibility of such damages.
+6.2 Indemnification: The Licensee agrees to indemnify, defend, and hold harmless the Licensor, its affiliates, officers, directors, employees, and agents from and against any claims, liabilities, damages, losses, costs, or expenses (including reasonable attorneys' fees) arising out of or related to the Licensee's use of the Model, any Derivatives, or any Output, including any violation of this Agreement or applicable laws. This includes, but is not limited to, ensuring compliance with copyright laws, privacy regulations, defamation laws, and any other applicable legal or regulatory requirements.
+7. Termination
+7.1 Termination by Licensor: The Licensor reserves the right to terminate this Agreement and revoke the Licensee’s rights to use the Model at any time, with or without cause, and without prior notice if the Licensee breaches any of the terms or conditions of this Agreement. Termination shall be effective immediately upon notice.
+7.2 Effect of Termination: Upon termination of this Agreement, the Licensee must immediately cease all use of the Model, Derivatives, and Output and destroy all copies of the Model, Derivatives, and Output in its possession or control, including any backup or archival copies. The Licensee shall certify in writing to the Licensor that such destruction has been completed.
+7.3 Survival: The provisions of this Agreement that by their nature should survive termination, including but not limited to, Sections 4 (Ownership), 5 (No Warranty), 6 (Limitation of Liability), and this Section 7 (Termination), shall continue to apply after termination.
+8. Governing Law
+8.1 Governing Law: This Agreement shall be governed by and construed in accordance with the laws of the Republic of Korea, without regard to its conflict of laws principles.
+8.2 Arbitration: Any disputes, controversies, or claims arising out of or relating to this Agreement, including its existence, validity, interpretation, performance, breach, or termination, shall be referred to and finally resolved by arbitration administered by the Korean Commercial Arbitration Board (KCAB) in accordance with the International Arbitration Rules of the Korean Commercial Arbitration Board in force at the time of the commencement of the arbitration. The seat of arbitration shall be Seoul, Republic of Korea. The tribunal shall consist of one arbitrator. The language of the arbitration shall be English.
+9. Alterations
+9.1 Modifications: The Licensor reserves the right to modify or amend this Agreement at any time, in its sole discretion. Any modifications will be effective upon posting the updated Agreement on the Licensor’s website or through other means of communication. The Licensee is responsible for reviewing the Agreement periodically for changes. Continued use of the Model after any modifications have been made constitutes acceptance of the revised Agreement.
+9.2 Entire Agreement: This Agreement constitutes the entire agreement between the Licensee and Licensor concerning the subject matter hereof and supersedes all prior or contemporaneous oral or written agreements, representations, or understandings. Any terms or conditions of any purchase order or other document submitted by the Licensee in connection with the Model that are in addition to, different from, or inconsistent with the terms and conditions of this Agreement are not binding on the Licensor and are void.
+By downloading, installing, or using the EXAONEPath AI Model, the Licensee acknowledges that it has read, understood, and agrees to be bound by the terms and conditions of this Agreement.

README.md CHANGED Viewed

@@ -2,4 +2,80 @@
 license: other
 license_name: exaonepath
 license_link: LICENSE
 ---

 license: other
 license_name: exaonepath
 license_link: LICENSE
+tags:
+- lg-ai
+- EXAONE-Path-2.0
+- pathology
 ---
+# EXAONE Path 2.0
+## Introduction
+In digital pathology, whole-slide images (WSIs) are often difficult to handle due to their gigapixel scale, so most approaches train patch encoders via self-supervised learning (SSL) and then aggregate the patch-level embeddings via multiple instance learning (MIL) or slide encoders for downstream tasks.
+However, patch-level SSL may overlook complex domain-specific features that are essential for biomarker prediction, such as mutation status and molecular characteristics, as SSL methods rely only on basic augmentations selected for natural image domains on small patch-level area.
+Moreover, SSL methods remain less data efficient than fully supervised approaches, requiring extensive computational resources and datasets to achieve competitive performance.
+To address these limitations, we present EXAONE Path 2.0, a pathology foundation model that learns patch-level representations under direct slide-level supervision.
+Using only 35k WSIs for training, EXAONE Path 2.0 achieves state-of-the-art average performance across 10 biomarker prediction tasks, demonstrating remarkable data efficiency.
+## Quickstart
+Load EXAONE Path and run inference on tile-level images.
+### 1. Prerequisites ###
+- NVIDIA GPU with 24GB+ VRAM
+- Python 3.12+
+Note: This implementation requires NVIDIA GPU and drivers. The provided environment setup specifically uses CUDA-enabled PyTorch, making NVIDIA GPU mandatory for running the model.
+### 2. Setup Python environment ###
+```bash
+git clone https://github.com/LG-AI-EXAONE/EXAONE-Path-2.0.git
+cd EXAONE-Path-2.0
+pip install -r
+```
+### 3. Load the model & Inference
+```python
+from exaonepath import EXAONEPathV20
+hf_token = "YOUR_HUGGING_FACE_ACCESS_TOKEN"
+model = EXAONEPathV20.from_pretrained("LGAI-EXAONE/EXAONE-Path-2.0", use_auth_token=hf_token)
+svs_path = "YOUR_SVS_PATH"
+patch_features = model(svs_path)[0]
+```
+## Model Performance Comparison
+| **Benchmarks** | **TITAN** | **PRISM** | **CHIEF** | **Prov-GigaPath** | **UNI2-h** | **EXAONE Path 1.0** | **EXAONE Path 2.0** |
+|---|---|---|---|---|---|---|---|
+| LUAD-TMB-USA1 | 0.690 | 0.645 | 0.650 | 0.674 | 0.669 | 0.692 | 0.664 |
+| LUAD-EGFR-USA1 | 0.754 | 0.815 | 0.784 | 0.709 | 0.827 | 0.784 | 0.853 |
+| LUAD-KRAS-USA2 | 0.541 | 0.623 | 0.468 | 0.511 | 0.469 | 0.527 | 0.645 |
+| CRC-MSI-KOR | 0.937 | 0.943 | 0.927 | 0.954 | 0.981 | 0.972 | 0.938 |
+| BRCA-TP53-CPTAC | 0.788 | 0.842 | 0.788 | 0.739 | 0.808 | 0.766 | 0.757 |
+| BRCA-PIK3CA-CPTAC | 0.758 | 0.893 | 0.702 | 0.735 | 0.857 | 0.735 | 0.804 |
+| RCC-PBRM1-CPTAC | 0.638 | 0.557 | 0.513 | 0.527 | 0.501 | 0.526 | 0.583 |
+| RCC-BAP1-CPTAC | 0.719 | 0.769 | 0.731 | 0.697 | 0.716 | 0.719 | 0.807 |
+| COAD-KRAS-CPTAC | 0.764 | 0.744 | 0.699 | 0.815 | 0.943 | 0.767 | 0.912 |
+| COAD-TP53-CPTAC | 0.889 | 0.816 | 0.701 | 0.712 | 0.783 | 0.819 | 0.875 |
+| **Average** | 0.748 | 0.765 | 0.696 | 0.707 | 0.755 | 0.731 | **0.784** |
+<br>
+## License
+The model is licensed under [EXAONEPath AI Model License Agreement 1.0 - NC](./LICENSE)
+<!-- ## Citation
+If you find EXAONE Path 2.0 useful, please cite it using this BibTeX:
+```
+@article{yun2024exaonepath,
+  title={EXAONE Path 2.0 Techincal Report},
+  author={Yun, Juseung and Hu, Yi and Kim, Jinhyung and Jang, Jongseong and Lee, Soonyoung},
+  journal={arXiv preprint arXiv:2408.00380},
+  year={2024}
+} -->
+```
+## Contact
+LG AI Research Technical Support: <a href="mailto:contact_us1@lgresearch.ai">contact_us1@lgresearch.ai</a>

config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "small_tile_size": 256,
+    "large_tile_size": 4096
+}

exaonepath.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import math
+import typing as t
+import torch
+import torch.nn as nn
+from cucim import CuImage
+from huggingface_hub import PyTorchModelHubMixin
+from torchvision.transforms import functional as TF
+from torchvision.transforms import v2 as T
+from networks.vit import vit4k_base, vit_base, vit_global_base
+from utils.tensor_utils import (
+    format_first_stg_act_as_second_stg_inp,
+    format_second_stg_act_as_third_stg_inp,
+    forward_with_batch_size_limit,
+    scale_and_normalize,
+    tile,
+)
+from utils.wsi_utils import load_slide_img, pack_slide, segment_tissue
+if t.TYPE_CHECKING:
+    from _typeshed import StrPath
+class PadToDivisible(T.Transform):
+    def __init__(self, size: int, pad_value: float | None = None):
+        super().__init__()
+        self.size = size
+        self.pad_value = pad_value
+    def transform(self, inpt, params):
+        assert isinstance(inpt, torch.Tensor) and inpt.ndim >= 3
+        H, W = inpt.shape[-2:]
+        pad_h = (self.size - H % self.size) % self.size
+        pad_w = (self.size - W % self.size) % self.size
+        if pad_h > 0 or pad_w > 0:
+            inpt = torch.nn.functional.pad(
+                inpt, (0, pad_w, 0, pad_h), value=self.pad_value
+            )
+        return inpt
+class Preprocessing(T.Transform):
+    def __init__(
+        self, small_tile_size_with_this_mpp: int, small_tile_size_with_target_mpp: int
+    ):
+        self.small_tile_size_with_this_mpp = small_tile_size_with_this_mpp
+        self.small_tile_size_with_target_mpp = small_tile_size_with_target_mpp
+    def transform(self, inpt, params):
+        assert isinstance(inpt, torch.Tensor) and inpt.ndim >= 3
+        # Scale the input tensor to the target MPP
+        if self.small_tile_size_with_this_mpp != self.small_tile_size_with_target_mpp:
+            inpt = TF.resize(
+                inpt,
+                [
+                    self.small_tile_size_with_target_mpp,
+                    self.small_tile_size_with_target_mpp,
+                ],
+            )
+        # Normalize the input tensor
+        inpt = scale_and_normalize(inpt)
+        return inpt
+class EXAONEPathV20(nn.Module, PyTorchModelHubMixin):
+    def __init__(
+        self,
+        small_tile_size: int = 256,
+        large_tile_size: int = 4096,
+    ):
+        super().__init__()
+        self.small_tile_size = small_tile_size
+        self.large_tile_size = large_tile_size
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model_first_stg = vit_base().to(self.device).eval()
+        self.model_second_stg = vit4k_base().to(self.device).eval()
+        self.model_third_stg = vit_global_base().to(self.device).eval()
+    def forward(
+        self,
+        svs_path: "StrPath",
+        target_mpp: float = 0.5,
+        first_stg_batch_size: int = 128,
+    ):
+        small_tiles, is_tile_valid, padded_size, small_tile_size, large_tile_size = (
+            self._load_wsi(svs_path, target_mpp=target_mpp)
+        )
+        width, height = padded_size
+        with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
+            with torch.no_grad():
+                act1 = forward_with_batch_size_limit(
+                    self.model_first_stg,
+                    small_tiles,
+                    batch_size_on_gpu=first_stg_batch_size,
+                    preproc_fn=Preprocessing(
+                        small_tile_size_with_this_mpp=small_tile_size,
+                        small_tile_size_with_target_mpp=self.small_tile_size,
+                    ),
+                    device=self.device,
+                    out_device=self.device,
+                    dtype=torch.bfloat16,
+                )
+                act1 = format_first_stg_act_as_second_stg_inp(
+                    act1,
+                    height=height,
+                    width=width,
+                    small_tile_size=small_tile_size,
+                    large_tile_size=large_tile_size,
+                )
+                act2: torch.Tensor = self.model_second_stg(act1)
+                act2_formatted = format_second_stg_act_as_third_stg_inp(
+                    act2,
+                    height=height,
+                    width=width,
+                    large_tile_size=large_tile_size,
+                )
+                act3: torch.Tensor = self.model_third_stg(act2_formatted)
+        return act1[is_tile_valid], act2, act3
+    def _load_wsi(self, svs_path: "StrPath", target_mpp: float):
+        # Load WSI tile
+        with CuImage(str(svs_path)) as wsi_obj:
+            try:
+                mpp = float(wsi_obj.metadata["aperio"]["MPP"])
+            except KeyError:
+                print(
+                    f"Warning: MPP metadata not found, using default value of {target_mpp}"
+                )
+                mpp = target_mpp
+            img = load_slide_img(wsi_obj)
+            height, width = img.shape[:2]
+            mask_tensor = torch.from_numpy(segment_tissue(svs_path, seg_level=-1)[0])
+            mask_tensor = TF.resize(mask_tensor.unsqueeze(0), [height, width]).squeeze(
+                0
+            )
+            x: torch.Tensor = torch.from_numpy(img).permute(2, 0, 1)
+        small_tile_size = math.ceil(self.small_tile_size * (target_mpp / mpp))
+        large_tile_size = (
+            self.large_tile_size // self.small_tile_size
+        ) * small_tile_size
+        pad_image = PadToDivisible(large_tile_size, 255)
+        pad_mask = PadToDivisible(large_tile_size, 0)
+        x = pad_image(x)
+        padded_size = (x.size(-1), x.size(-2))
+        x = tile(x, small_tile_size)
+        mask_padded = pad_mask(mask_tensor.unsqueeze(0))
+        mask_tile = tile(mask_padded, small_tile_size).squeeze(1)
+        is_tile_valid = mask_tile.sum(dim=(1, 2)) > 0
+        return x, is_tile_valid, padded_size, small_tile_size, large_tile_size

networks/__init__.py ADDED Viewed

File without changes

networks/vit.py ADDED Viewed

	@@ -0,0 +1,569 @@

+import math
+import warnings
+from functools import partial
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+torch.set_float32_matmul_precision("high")
+torch.backends.cuda.enable_flash_sdp(True)
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn(
+            "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+            "The distribution of values may be incorrect.",
+            stacklevel=2,
+        )
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.0))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+def trunc_normal_(tensor, mean=0.0, std=1.0, a=-2.0, b=2.0):
+    # type: (torch.Tensor, float, float, float, float) -> torch.Tensor
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
+def drop_path(x, drop_prob: float = 0.0, training: bool = False):
+    if drop_prob == 0.0 or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (
+        x.ndim - 1
+    )  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+class LayerScale(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        init_values: float = 1e-5,
+        inplace: bool = False,
+    ) -> None:
+        super().__init__()
+        self.inplace = inplace
+        self.gamma = nn.Parameter(init_values * torch.ones(dim))
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x.mul_(self.gamma) if self.inplace else x * self.gamma
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks)."""
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+    ):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+        self.drop_p = drop
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+# TODO Use SelfAttention class in networks.modules
+class Attention(nn.Module):
+    def __init__(
+        self,
+        dim,
+        num_heads=8,
+        qkv_bias=False,
+        qk_scale=None,
+        attn_drop=0.0,
+        proj_drop=0.0,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.attn_drop_p = attn_drop
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.proj_drop_p = proj_drop
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = (
+            self.qkv(x)
+            .reshape(B, N, 3, self.num_heads, C // self.num_heads)
+            .permute(2, 0, 3, 1, 4)
+        )
+        q, k, v = qkv[0], qkv[1], qkv[2]
+        x = F.scaled_dot_product_attention(
+            q, k, v, dropout_p=self.attn_drop.p, scale=self.scale
+        )
+        x = x.transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(
+        self,
+        dim,
+        num_heads,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        qk_scale=None,
+        drop=0.0,
+        attn_drop=0.0,
+        init_values=None,
+        drop_path=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+    ):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+        )
+        self.ls1 = (
+            LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
+        )
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+        )
+        self.ls2 = (
+            LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
+        )
+    def forward(self, x):
+        x = x + self.drop_path(self.ls1(self.attn(self.norm1(x))))
+        x = x + self.drop_path(self.ls2(self.mlp(self.norm2(x))))
+        return x
+class PatchEmbed(nn.Module):
+    """Image to Patch Embedding"""
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        num_patches = (img_size // patch_size) * (img_size // patch_size)
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = nn.Conv2d(
+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
+        )
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+class Network(nn.Module):
+    emb_dim: int
+class VisionTransformer(Network):
+    """Vision Transformer"""
+    def __init__(
+        self,
+        img_size=256,
+        patch_size=16,
+        in_chans=3,
+        num_classes=0,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        qk_scale=None,
+        init_values=None,  # for layerscale: None or 0 => no layerscale
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=nn.LayerNorm,
+        **kwargs
+    ):
+        super().__init__()
+        self.num_features = self.embed_dim = embed_dim
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=in_chans,
+            embed_dim=embed_dim,
+        )
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        dpr = [
+            x.item() for x in torch.linspace(0, drop_path_rate, depth)
+        ]  # stochastic depth decay rule
+        self.blocks = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dim,
+                    num_heads=num_heads,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    init_values=init_values,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[i],
+                    norm_layer=norm_layer,
+                )
+                for i in range(depth)
+            ]
+        )
+        self.norm = norm_layer(embed_dim)
+        # Classifier head
+        self.head = (
+            nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+        )
+        trunc_normal_(self.pos_embed, std=0.02)
+        trunc_normal_(self.cls_token, std=0.02)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    def interpolate_pos_encoding(self, x, w, h):
+        npatch = x.shape[1] - 1
+        N = self.pos_embed.shape[1] - 1
+        if npatch == N and w == h:
+            return self.pos_embed
+        class_pos_embed = self.pos_embed[:, 0]
+        patch_pos_embed = self.pos_embed[:, 1:]
+        dim = x.shape[-1]
+        w0 = w // self.patch_embed.patch_size
+        h0 = h // self.patch_embed.patch_size
+        # we add a small number to avoid floating point error in the interpolation
+        # see discussion at https://github.com/facebookresearch/dino/issues/8
+        w0, h0 = w0 + 0.1, h0 + 0.1
+        patch_pos_embed = nn.functional.interpolate(
+            patch_pos_embed.reshape(
+                1, int(math.sqrt(N)), int(math.sqrt(N)), dim
+            ).permute(0, 3, 1, 2),
+            scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)),
+            mode="bicubic",
+        )
+        assert (
+            int(w0) == patch_pos_embed.shape[-2]
+            and int(h0) == patch_pos_embed.shape[-1]
+        )
+        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
+        return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1)
+    def prepare_tokens(self, x):
+        B, nc, w, h = x.shape
+        x = self.patch_embed(x)  # patch linear embedding
+        # add the [CLS] token to the embed patch tokens
+        cls_tokens = self.cls_token.expand(B, -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)  # (B, S + 1, C)
+        # add positional encoding to each token
+        x = x + self.interpolate_pos_encoding(x, w, h)
+        return self.pos_drop(x)
+    def forward(self, x):
+        x = self.prepare_tokens(x)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        return x[:, 0]
+    def get_last_selfattention(self, x):
+        x = self.prepare_tokens(x)
+        for i, blk in enumerate(self.blocks):
+            if i < len(self.blocks) - 1:
+                x = blk(x)
+            else:
+                # return attention of the last block
+                return blk(x, return_attention=True)
+    def get_intermediate_layers(self, x, n=1):
+        x = self.prepare_tokens(x)
+        # we return the output tokens from the `n` last blocks
+        output = []
+        for i, blk in enumerate(self.blocks):
+            x = blk(x)
+            if len(self.blocks) - i <= n:
+                output.append(self.norm(x))
+        return output
+class VisionTransformer4K(Network):
+    """Vision Transformer 4K"""
+    def __init__(
+        self,
+        num_classes=0,
+        img_size=256,
+        input_embed_dim=384,
+        output_embed_dim=192,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        qk_scale=None,
+        init_values=None,  # for layerscale: None or 0 => no layerscale
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.0,
+        norm_layer=nn.LayerNorm,
+        num_prototypes=64,
+        **kwargs
+    ):
+        super().__init__()
+        embed_dim = output_embed_dim
+        self.num_features = self.embed_dim = embed_dim
+        self.phi = nn.Sequential(
+            *[
+                nn.Linear(input_embed_dim, output_embed_dim),
+                nn.GELU(),
+                nn.Dropout(p=drop_rate),
+            ]
+        )
+        num_patches = int(img_size // 16) ** 2
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        dpr = [
+            x.item() for x in torch.linspace(0, drop_path_rate, depth)
+        ]  # stochastic depth decay rule
+        self.blocks = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dim,
+                    num_heads=num_heads,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    qk_scale=qk_scale,
+                    init_values=init_values,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    drop_path=dpr[i],
+                    norm_layer=norm_layer,
+                )
+                for i in range(depth)
+            ]
+        )
+        self.norm = norm_layer(embed_dim)
+        # Classifier head
+        self.head = (
+            nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+        )
+        trunc_normal_(self.pos_embed, std=0.02)
+        trunc_normal_(self.cls_token, std=0.02)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    def interpolate_pos_encoding(self, x, w, h):
+        npatch = x.shape[1] - 1
+        N = self.pos_embed.shape[1] - 1
+        if npatch == N and w == h:
+            return self.pos_embed
+        class_pos_embed = self.pos_embed[:, 0]
+        patch_pos_embed = self.pos_embed[:, 1:]
+        dim = x.shape[-1]
+        w0 = w // 1
+        h0 = h // 1
+        # we add a small number to avoid floating point error in the interpolation
+        # see discussion at https://github.com/facebookresearch/dino/issues/8
+        w0, h0 = w0 + 0.1, h0 + 0.1
+        patch_pos_embed = nn.functional.interpolate(
+            patch_pos_embed.reshape(
+                1, int(math.sqrt(N)), int(math.sqrt(N)), dim
+            ).permute(0, 3, 1, 2),
+            scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)),
+            mode="bicubic",
+        )
+        assert (
+            int(w0) == patch_pos_embed.shape[-2]
+            and int(h0) == patch_pos_embed.shape[-1]
+        )
+        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
+        return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1)
+    def prepare_tokens(self, x):
+        # print('preparing tokens (after crop)', x.shape)
+        self.mpp_feature = x
+        B, embed_dim, w, h = x.shape
+        x = x.flatten(2, 3).transpose(1, 2)
+        x = self.phi(x)
+        # add the [CLS] token to the embed patch tokens
+        cls_tokens = self.cls_token.expand(B, -1, -1)
+        x = torch.cat((cls_tokens, x), dim=1)
+        # add positional encoding to each token
+        x = x + self.interpolate_pos_encoding(x, w, h)
+        return self.pos_drop(x)
+    def forward(self, x):
+        x = self.prepare_tokens(x)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        return x[:, 0]
+    def get_last_selfattention(self, x):
+        x = self.prepare_tokens(x)
+        for i, blk in enumerate(self.blocks):
+            if i < len(self.blocks) - 1:
+                x = blk(x)
+            else:
+                # return attention of the last block
+                return blk(x, return_attention=True)
+    def get_intermediate_layers(self, x, n=1):
+        x = self.prepare_tokens(x)
+        # we return the output tokens from the `n` last blocks
+        output = []
+        for i, blk in enumerate(self.blocks):
+            x = blk(x)
+            if len(self.blocks) - i <= n:
+                output.append(self.norm(x))
+        return output
+def vit_base(patch_size=16, **kwargs):
+    model = VisionTransformer(
+        patch_size=patch_size,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4,
+        qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6),
+        **kwargs
+    )
+    return model
+def vit4k_base(patch_size=16, **kwargs):
+    model = VisionTransformer4K(
+        patch_size=patch_size,
+        input_embed_dim=768,
+        output_embed_dim=768,
+        depth=6,
+        num_heads=12,
+        mlp_ratio=4,
+        qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6),
+        **kwargs
+    )
+    return model
+def vit_global_base(patch_size=16, **kwargs):
+    model = VisionTransformer4K(
+        patch_size=patch_size,
+        input_embed_dim=768,
+        output_embed_dim=768,
+        depth=2,
+        num_heads=6,
+        mlp_ratio=4,
+        qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6),
+        **kwargs
+    )
+    return model

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17cc4f62887c3dc97380f0b824278cc16f75638f46657f3487204a0633462373
+size 576596452

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+--extra-index-url https://download.pytorch.org/whl/cu124
+torch==2.6.0+cu124
+torchvision==0.21.0+cu124
+# configs
+pydantic==2.10.3
+# data
+openslide-bin==4.0.0.6
+openslide-python==1.4.1
+cucim-cu12==25.2
+rectangle_packer==2.0.2
+opencv-python-headless==4.11.0.86
+huggingface_hub

utils/__init__.py ADDED Viewed

File without changes

utils/tensor_utils.py ADDED Viewed

	@@ -0,0 +1,318 @@

+import typing as t
+import torch
+import torchvision.transforms.functional as TF
+def tile(x: torch.Tensor, size: int, pad_value: int | float | None = None):
+    C, H, W = x.shape[-3:]
+    pad_h = (size - H % size) % size
+    pad_w = (size - W % size) % size
+    if pad_h > 0 or pad_w > 0:
+        x = torch.nn.functional.pad(x, (0, pad_w, 0, pad_h), value=pad_value)
+    nh, nw = x.size(-2) // size, x.size(-1) // size
+    return (
+        x.view(-1, C, nh, size, nw, size)
+        .permute(0, 2, 4, 1, 3, 5)
+        .reshape(-1, C, size, size)
+    )
+def small_tiles_to_large_tiles(
+    small_tiles: torch.Tensor,
+    width: int,
+    large_tile_size: int,
+    sampled_large_tiles_idx: list | torch.Tensor | None = None,
+) -> torch.Tensor:
+    has_channel = small_tiles.ndim == 4
+    small_tile_size = small_tiles.size(-1)
+    num_small_tiles = small_tiles.size(0)
+    nw = width // small_tile_size
+    nh = num_small_tiles // nw
+    r = large_tile_size // small_tile_size
+    num_large_tiles = (nh // r) * (nw // r)
+    large_tile_indices = (
+        range(num_large_tiles)
+        if sampled_large_tiles_idx is None
+        else sampled_large_tiles_idx
+    )
+    tiles = []
+    for k in large_tile_indices:
+        start_row = (k // (nw // r)) * r
+        start_col = (k % (nw // r)) * r
+        for i in range(start_row, start_row + r):
+            for j in range(start_col, start_col + r):
+                tiles.append(small_tiles[i * nw + j])
+    stacked = torch.stack(tiles, dim=0).view(-1, r, r, *small_tiles.shape[1:])
+    if has_channel:
+        large_tiles = stacked.permute(0, 3, 1, 4, 2, 5).reshape(
+            -1, small_tiles.size(1), large_tile_size, large_tile_size
+        )
+    else:
+        large_tiles = stacked.permute(0, 1, 3, 2, 4).reshape(
+            -1, large_tile_size, large_tile_size
+        )
+    return large_tiles
+def small_tile_flags_to_large_tile_flags(
+    small_tile_flags: torch.Tensor,
+    width: int,
+    small_tile_size: int,
+    large_tile_size: int,
+    aggregation: t.Literal["any", "all"] = "any",
+):
+    small_tile_flags = small_tile_flags.view(-1, 1, 1)
+    num_small_tiles = small_tile_flags.size(0)
+    nw = width // small_tile_size
+    r = large_tile_size // small_tile_size
+    num_large_tiles = num_small_tiles // r**2
+    large_tile_flags = small_tiles_to_large_tiles(
+        small_tile_flags,
+        width=nw,
+        large_tile_size=r,
+    ).view(num_large_tiles, -1)
+    return (
+        large_tile_flags.any(-1) if aggregation == "any" else large_tile_flags.all(-1)
+    )
+def format_first_stg_act_as_second_stg_inp(
+    x: torch.Tensor,
+    height: int,
+    width: int,
+    small_tile_size: int,
+    large_tile_size: int,
+):
+    assert height % small_tile_size == 0 and width % small_tile_size == 0
+    D = x.size(1)
+    nh, nw = height // small_tile_size, width // small_tile_size
+    r = large_tile_size // small_tile_size
+    x = x.view(-1, nh, nw, D)
+    x = x.permute(0, 3, 1, 2).reshape(-1, D, nh // r, r, nw // r, r)
+    x = x.permute(0, 2, 4, 1, 3, 5).reshape(-1, D, r, r)
+    return x
+def format_second_stg_inp_as_first_stg_act(
+    x: torch.Tensor, height: int, width: int, small_tile_size: int, large_tile_size: int
+):
+    D = x.size(1)
+    nh, nw = height // small_tile_size, width // small_tile_size
+    r = large_tile_size // small_tile_size
+    x = x.view(-1, nh // r, nw // r, D, r, r)
+    x = x.permute(0, 3, 1, 4, 2, 5).reshape(-1, D, nh, nw)
+    x = x.permute(0, 2, 3, 1).reshape(-1, D)
+    return x
+def format_second_stg_act_as_third_stg_inp(
+    x: torch.Tensor,
+    height: int,
+    width: int,
+    large_tile_size: int,
+):
+    D = x.size(1)
+    nh = height // large_tile_size
+    nw = width // large_tile_size
+    return x.view(-1, nh, nw, D).permute(0, 3, 1, 2).contiguous()
+def forward_with_batch_size_limit(
+    net,
+    x: torch.Tensor,
+    batch_size_on_gpu: int,
+    device: str | torch.device,
+    out_device: str | torch.device,
+    preproc_fn: t.Callable[[torch.Tensor], torch.Tensor] | None = None,
+    dtype: torch.dtype = torch.float32,
+):
+    features = list()
+    for start_idx in range(0, x.size(0), batch_size_on_gpu):
+        end_idx = min(x.size(0), start_idx + batch_size_on_gpu)
+        batch = x[start_idx:end_idx].to(device=device, non_blocking=True)
+        batch = preproc_fn(batch) if preproc_fn else batch
+        batch = batch.to(dtype=dtype, non_blocking=True)
+        actual_bs = end_idx - start_idx
+        batch = pad_to_batch(batch, batch_size_on_gpu)
+        batch: torch.Tensor = forward_compiled(net, batch)
+        # batch = net(batch)
+        features.append(batch[:actual_bs].to(device=out_device, non_blocking=True))
+    if torch.device(out_device).type == "cpu" and torch.device(device).type == "cuda":
+        torch.cuda.synchronize()
+    return torch.cat(features)
+@t.overload
+def backward_with_batch_size_limit(
+    net,
+    x: torch.Tensor,
+    grad: torch.Tensor,
+    batch_size_on_gpu: int,
+    device: str | torch.device,
+    out_device: str | torch.device,
+    dtype: torch.dtype,
+    ret_grad: t.Literal[True],
+) -> torch.Tensor: ...
+@t.overload
+def backward_with_batch_size_limit(
+    net,
+    x: torch.Tensor,
+    grad: torch.Tensor,
+    batch_size_on_gpu: int,
+    device: str | torch.device,
+    out_device: str | torch.device,
+    dtype: torch.dtype,
+    ret_grad: t.Literal[False],
+) -> None: ...
+def backward_with_batch_size_limit(
+    net,
+    x: torch.Tensor,
+    grad: torch.Tensor,
+    batch_size_on_gpu: int,
+    device: str | torch.device,
+    out_device: str | torch.device,
+    dtype: torch.dtype,
+    ret_grad: bool,
+):
+    assert x.size(0) == grad.size(0)
+    grads = []
+    total = x.size(0)
+    for start in range(0, total, batch_size_on_gpu):
+        end = min(total, start + batch_size_on_gpu)
+        actual_bs = end - start
+        batch = x[start:end].to(device=device, dtype=dtype, non_blocking=True)
+        batch = pad_to_batch(batch, batch_size_on_gpu)
+        if ret_grad:
+            batch.requires_grad_(True)
+        with torch.autocast(device_type="cuda", dtype=dtype):
+            out = net(batch)
+            # out = forward_compiled(net, batch)
+        grad_batch = grad[start:end].to(device=device, dtype=dtype, non_blocking=True)
+        grad_batch = pad_to_batch(grad_batch, batch_size_on_gpu)
+        with torch._dynamo.utils.maybe_enable_compiled_autograd(
+            True, fullgraph=True, dynamic=False
+        ):
+            out.backward(grad_batch)
+        # out.backward(grad_batch)
+        if ret_grad:
+            assert batch.grad is not None
+            grads.append(batch.grad[:actual_bs].to(out_device, non_blocking=True))
+    if ret_grad:
+        if (
+            torch.device(out_device).type == "cpu"
+            and torch.device(device).type == "cuda"
+        ):
+            torch.cuda.synchronize()
+        return torch.cat(grads)
+@torch.compile(fullgraph=True, dynamic=False)
+def forward_compiled(net, x: torch.Tensor) -> torch.Tensor:
+    return net(x)
+def pad_to_batch(t: torch.Tensor, batch_size: int) -> torch.Tensor:
+    assert (
+        t.size(0) <= batch_size
+    ), f"'{t.shape}' size tensor cannot be padded to be batch size of '{batch_size}'"
+    pad = batch_size - t.size(0)
+    return torch.cat([t, t.new_zeros((pad,) + t.shape[1:])], dim=0) if pad > 0 else t
+def scale_and_normalize(x: torch.Tensor, inplace: bool = False):
+    x = x.clamp_(0, 255) if inplace else x.clamp(0, 255)
+    x = TF.normalize(
+        x / 255, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=inplace
+    )
+    return x
+def combine_tile_list(tile_list: list[torch.Tensor], ncols: int):
+    """
+    Combines a flat list of tile tensors (each with shape (C, H, W)) into one output tensor,
+    arranging them in a grid with the specified number of columns. The tiles in the final row
+    or column may have different sizes.
+    Args:
+        tile_list (list of torch.Tensor): A flat list of tile tensors, each with shape
+                                           (channels, tile_height, tile_width). It is assumed
+                                           that the number of channels is consistent across all tiles.
+        ncols (int): Number of columns to arrange the tiles in.
+    Returns:
+        torch.Tensor: A tensor of shape (channels, total_height, total_width), where:
+            - total_height is the sum of maximum tile heights in each row.
+            - total_width is the sum of maximum tile widths in each column.
+    """
+    if not tile_list:
+        raise ValueError("tile_list is empty")
+    ntiles = len(tile_list)
+    nrows = (ntiles + ncols - 1) // ncols  # Ceiling division to get the number of rows
+    # Convert the flat tile list into a nested list (rows of tiles)
+    nested_tiles = [tile_list[i * ncols : (i + 1) * ncols] for i in range(nrows)]
+    # Compute the maximum tile height for each row
+    row_heights = [max(tile.shape[1] for tile in row) for row in nested_tiles]
+    # Compute the maximum tile width for each column (consider only rows that have a tile in that column)
+    col_widths = []
+    for col in range(ncols):
+        max_width = 0
+        for row in nested_tiles:
+            if col < len(row):
+                tile_w = row[col].shape[2]
+                if tile_w > max_width:
+                    max_width = tile_w
+        col_widths.append(max_width)
+    # Calculate the total output dimensions
+    total_height = sum(row_heights)
+    total_width = sum(col_widths)
+    # Determine the number of channels from the first tile
+    channels = tile_list[0].shape[0]
+    # Preallocate the output tensor (this avoids repeated concatenation and extra memory copies)
+    out_tensor = torch.zeros(
+        channels,
+        total_height,
+        total_width,
+        dtype=tile_list[0].dtype,
+        device=tile_list[0].device,
+    )
+    # Place each tile in its proper location by calculating offsets
+    y_offset = 0
+    for i, row in enumerate(nested_tiles):
+        x_offset = 0
+        for j, tile in enumerate(row):
+            tile_h, tile_w = tile.shape[1], tile.shape[2]
+            out_tensor[
+                :, y_offset : y_offset + tile_h, x_offset : x_offset + tile_w
+            ] = tile
+            x_offset += col_widths[j]
+        y_offset += row_heights[i]
+    return out_tensor

utils/wsi_utils.py ADDED Viewed

	@@ -0,0 +1,514 @@

+import typing as t
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+from tracemalloc import start
+import cv2
+import numpy as np
+import rpack
+from openslide import OpenSlide
+from PIL import Image
+from scipy.ndimage import binary_fill_holes
+from skimage import filters
+from skimage.morphology import remove_small_objects
+if t.TYPE_CHECKING:
+    from _typeshed import StrPath
+try:
+    from skimage import img_as_ubyte  # type: ignore
+except:
+    from skimage.util import img_as_ubyte  # type: ignore
+def find_contours(arr: np.ndarray, only_outer: bool = True, convex: bool = False):
+    """Find contours in a binary image
+    Parameters
+    ----------
+    arr : np.ndarray
+        Binary image
+    only_outer : bool
+        If True, only find external contours
+    convex : bool
+        If True, return convex hull of contours
+    Returns
+    -------
+    contours : list
+        List of contours
+    """
+    mode = cv2.RETR_EXTERNAL if only_outer else cv2.RETR_LIST
+    cresults = cv2.findContours(arr.astype(np.uint8), mode, cv2.CHAIN_APPROX_SIMPLE)
+    contours = cresults[1] if len(cresults) == 3 else cresults[0]
+    contours = list(contours) if isinstance(contours, tuple) else contours
+    if convex:
+        contours = [cv2.convexHull(cnt) for cnt in contours]
+    return contours
+def merge_overlapping_bboxes(bboxes: list):
+    """Merge overlapping bounding boxes
+    Parameters
+    ----------
+    bboxes : list
+        List of bounding boxes in format (x, y, width, height)
+    """
+    candidate_count = 0
+    while candidate_count < len(bboxes):
+        candidate_count += 1
+        overlap = False
+        candidate_box = bboxes.pop(0)
+        for index, compare_box in enumerate(bboxes):
+            overlapping, new_bbox = merge_if_overlapping(candidate_box, compare_box)
+            if overlapping:
+                overlap = True
+                candidate_count = 0
+                bboxes.pop(index)
+                bboxes.append(new_bbox)
+                break
+        if not overlap:
+            bboxes.append(candidate_box)
+def merge_if_overlapping(a: tuple, b: tuple):
+    """Check if two bounding boxes overlap and merge them if they do
+    Parameters
+    ----------
+    a : tuple
+        First bounding box in format (x, y, width, height)
+    b : tuple
+        Second bounding box in format (x, y, width, height)
+    Returns
+    -------
+    overlapping : bool
+        True if boxes overlap
+    new_bbox : tuple
+        Merged bounding box if overlapping, empty list otherwise
+    """
+    bottom = np.max([a[0], b[0]])
+    top = np.min([a[0] + a[2], b[0] + b[2]])
+    left = np.max([a[1], b[1]])
+    right = np.min([a[1] + a[3], b[1] + b[3]])
+    do_intersect = bottom < top and left < right
+    if do_intersect:
+        x_min = np.min([a[1], b[1]])
+        y_min = np.min([a[0], b[0]])
+        x_max = np.max([a[1] + a[3], b[1] + b[3]])
+        y_max = np.max([a[0] + a[2], b[0] + b[2]])
+        new_bbox = (y_min, x_min, y_max - y_min, x_max - x_min)
+        return True, new_bbox
+    return False, []
+def load_slide_img(
+    wsi,
+    level: int = 0,
+) -> np.ndarray:
+    """Load slide image with specific level
+    Parameters
+    ----------
+    wsi : CuImage
+        The CuImage object
+    level : int
+        Slide level to load
+    Returns
+    -------
+    slide_img : np.ndarray
+        Numpy array with RGB channels
+    """
+    slide_img = np.asarray(wsi.read_region(level=level, device="gpu", num_workers=32))
+    if slide_img.shape[2] == 4:
+        slide_img = slide_img[:, :, :-1]
+    return slide_img
+def rgb2gray(img):
+    """Convert RGB image to grayscale
+    Parameters
+    ----------
+    img : np.ndarray
+        RGB image with 3 channels
+    Returns
+    -------
+    gray : np.ndarray
+        Grayscale image
+    """
+    return np.dot(img, [0.299, 0.587, 0.114])
+def thresh_slide(gray, thresh_val, sigma=13):
+    """Threshold gray image to binary image
+    Parameters
+    ----------
+    gray : np.ndarray
+        2D grayscale image
+    thresh_val : float
+        Thresholding value
+    sigma : int
+        Gaussian smoothing sigma
+    Returns
+    -------
+    bw_img : np.ndarray
+        Binary image
+    """
+    smooth = filters.gaussian(gray, sigma=sigma)
+    smooth /= np.amax(smooth)
+    bw_img = smooth < thresh_val
+    return bw_img
+def get_tissue_bboxes(
+    mask: np.ndarray, wsi_width: int, wsi_height: int, min_tissue_size: int = 10000
+):
+    scale = wsi_height / mask.shape[0]
+    contours = find_contours(mask)
+    areas = []
+    for cnt in contours:
+        area = cv2.contourArea(cnt)
+        areas.append(area)
+    large_contours = []
+    large_areas = []
+    for i, cnt in enumerate(contours):
+        area_mm = areas[i]
+        if area_mm >= min_tissue_size:
+            large_contours.append(cnt)
+            large_areas.append(area_mm)
+    areas = large_areas
+    boxes = [cv2.boundingRect(c) for c in large_contours]
+    return (
+        [cv2.boundingRect(c) for c in large_contours]
+        if boxes
+        else [[0, 0, wsi_width, wsi_height]]
+    )
+def get_tissue_positions_and_packed_size(
+    boxes,
+    wsi_width: int,
+    wsi_height: int,
+    scale: float,
+) -> tuple[list[tuple[int, int]], tuple[int, int]]:
+    if len(boxes) > 1:
+        merge_overlapping_bboxes(boxes)
+    boxes = np.array(boxes, dtype=np.float32) * scale
+    if len(boxes.shape) == 1:
+        boxes = boxes[None]
+    boxes[:, :2] = np.floor(boxes[:, :2])
+    boxes[:, 0] = np.clip(boxes[:, 0], 0, wsi_width - 1)
+    boxes[:, 1] = np.clip(boxes[:, 1], 0, wsi_height - 1)
+    boxes[:, 2:] = np.ceil(boxes[:, 2:])
+    boxes[:, 2] = np.clip(boxes[:, 2], 0, wsi_width - boxes[:, 0])
+    boxes[:, 3] = np.clip(boxes[:, 3], 0, wsi_height - boxes[:, 1])
+    boxes = boxes.astype(np.int32)
+    box_sizes = [(int(box[2]), int(box[3])) for box in boxes]
+    positions = rpack.pack(box_sizes)  # at processing spacing
+    packed_size: tuple[int, int] = rpack.bbox_size(
+        box_sizes, positions
+    )  # width, height
+    counter = 0
+    for sdf in np.arange(0.5, 0.96, 0.05):
+        # asymmetry_factor = min(packed_size)/max(packed_size)
+        # if asymmetry_factor < sdf:
+        rparams = {
+            "max_height": int(max(packed_size) * sdf),
+            "max_width": int(max(packed_size) * sdf),
+        }
+        try:
+            positions = rpack.pack(box_sizes, **rparams)  # at processing spacing
+            packed_size: tuple[int, int] = rpack.bbox_size(box_sizes, positions)
+            break
+        except rpack.PackingImpossibleError as ex:
+            counter += 1
+    return positions, (int(packed_size[0]), int(packed_size[1]))
+def pack_slide(
+    wsi_arr: np.ndarray,
+    mask: np.ndarray,
+    min_tissue_size: int = 10000,
+):
+    H, W = wsi_arr.shape[:2]
+    boxes = get_tissue_bboxes(mask, W, H, min_tissue_size=min_tissue_size)
+    if len(boxes) > 0:
+        positions, packed_size = get_tissue_positions_and_packed_size(
+            boxes, W, H, H / mask.shape[0]
+        )
+        img_out = np.full(
+            (packed_size[1], packed_size[0]) + wsi_arr.shape[2:],
+            255,
+            dtype=wsi_arr.dtype,
+        )
+        mask_out = np.zeros((packed_size[1], packed_size[0]), dtype=np.bool)
+        for i, pos in enumerate(positions):
+            box = boxes[i]
+            img_out[pos[1] : pos[1] + box[3], pos[0] : pos[0] + box[2]] = wsi_arr[
+                box[1] : box[1] + box[3], box[0] : box[0] + box[2]
+            ]
+            mask_out[pos[1] : pos[1] + box[3], pos[0] : pos[0] + box[2]] = mask[
+                box[1] : box[1] + box[3], box[0] : box[0] + box[2]
+            ]
+    else:
+        img_out = wsi_arr
+        mask_out = mask
+    return img_out, mask_out
+def get_level_downsamples(wsi: OpenSlide):
+    level_downsamples = []
+    dim_0 = wsi.level_dimensions[0]
+    for downsample, dim in zip(wsi.level_downsamples, wsi.level_dimensions):
+        estimated_downsample = (dim_0[0] / float(dim[0]), dim_0[1] / float(dim[1]))
+        (
+            level_downsamples.append(estimated_downsample)
+            if estimated_downsample != (downsample, downsample)
+            else level_downsamples.append((downsample, downsample))
+        )
+    return level_downsamples
+def segment_tissue(
+    wsi_path: Path,
+    seg_level=-1,
+    sthresh=8,
+    sthresh_up=255,
+    mthresh=7,
+    close=4,
+    filter_params={"a_t": 1, "a_h": 1, "max_n_holes": 100},
+    ref_patch_size=512,
+):
+    """
+    Segment the tissue via HSV -> Median thresholding -> Binary threshold
+    """
+    def _filter_contours(contours, hierarchy, filter_params):
+        """
+        Filter contours by: area.
+        """
+        filtered = []
+        # find indices of foreground contours (parent == -1)
+        hierarchy_1 = np.flatnonzero(hierarchy[:, 1] == -1)
+        all_holes = []
+        # loop through foreground contour indices
+        for cont_idx in hierarchy_1:
+            # actual contour
+            cont = contours[cont_idx]
+            # indices of holes contained in this contour (children of parent contour)
+            holes = np.flatnonzero(hierarchy[:, 1] == cont_idx)
+            # take contour area (includes holes)
+            a = cv2.contourArea(cont)
+            # calculate the contour area of each hole
+            hole_areas = [cv2.contourArea(contours[hole_idx]) for hole_idx in holes]
+            # actual area of foreground contour region
+            a = a - np.array(hole_areas).sum()
+            if a == 0:
+                continue
+            if tuple((filter_params["a_t"],)) < tuple((a,)):
+                filtered.append(cont_idx)
+                all_holes.append(holes)
+        foreground_contours = [contours[cont_idx] for cont_idx in filtered]
+        hole_contours = []
+        for hole_ids in all_holes:
+            unfiltered_holes = [contours[idx] for idx in hole_ids]
+            unfilered_holes = sorted(
+                unfiltered_holes, key=cv2.contourArea, reverse=True
+            )
+            # take max_n_holes largest holes by area
+            unfilered_holes = unfilered_holes[: filter_params["max_n_holes"]]
+            filtered_holes = []
+            # filter these holes
+            for hole in unfilered_holes:
+                if cv2.contourArea(hole) > filter_params["a_h"]:
+                    filtered_holes.append(hole)
+            hole_contours.append(filtered_holes)
+        return foreground_contours, hole_contours
+    def draw_white_bands(img: np.ndarray, thickness: int):
+        height, width = img.shape[:2]
+        white = [255, 255, 255]  # 흰색 (B, G, R)
+        # cv2.copyMakeBorder 함수를 사용해 흰색 띠를 추가
+        # 두께 30픽셀의 위쪽 흰색 띠 그리기
+        cv2.rectangle(img, (0, 0), (width, thickness), white, -1)
+        # 두께 30픽셀의 아래쪽 흰색 띠 그리기
+        cv2.rectangle(img, (0, height - thickness), (width, height), white, -1)
+        # 두께 30픽셀의 왼쪽 흰색 띠 그리기
+        cv2.rectangle(img, (0, 0), (thickness, height), white, -1)
+        # 두께 30픽셀의 오른쪽 흰색 띠 그리기
+        cv2.rectangle(img, (width - thickness, 0), (width, height), white, -1)
+    with OpenSlide(str(wsi_path)) as wsi:
+        if seg_level < 0:
+            seg_level = wsi.get_best_level_for_downsample(64)
+        img = np.asarray(
+            wsi.read_region(
+                location=(0, 0), level=seg_level, size=wsi.level_dimensions[seg_level]
+            )
+        )
+        img_rgb = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
+        draw_white_bands(img_rgb, thickness=20)
+        img_gray = cv2.cvtColor(img, cv2.COLOR_RGBA2GRAY)
+        H, W = img_rgb.shape[:2]
+        B_8, G_8, R_8 = cv2.split(img_rgb)
+        B = B_8.astype(np.int32)
+        G = G_8.astype(np.int32)
+        R = R_8.astype(np.int32)
+        mask = (R >= 0) & (R <= 110) & (G >= 0) & (G <= 110) & (B >= 0) & (B <= 110)
+        color_difference1 = np.abs((R) - (G)) <= 15
+        color_difference2 = np.abs((G) - (B)) <= 15
+        color_difference3 = np.abs((R) - (B)) <= 15
+        color_difference = color_difference1 & color_difference2 & color_difference3
+        final_mask = mask & color_difference
+        laplacian = cv2.Laplacian(img_gray, cv2.CV_64F)
+        laplacian_abs = cv2.convertScaleAbs(laplacian)
+        mask = laplacian_abs <= 15
+        img_rgb[mask] = [255, 255, 255]
+        img_hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)  # Convert to HSV space
+        img_med = cv2.medianBlur(
+            img_hsv[:, :, 1], mthresh
+        )  # Apply median blurring #same to median filter
+        # Thresholding
+        _, img_thresh = cv2.threshold(img_med, sthresh, sthresh_up, cv2.THRESH_BINARY)
+        # Morphological closing
+        if close > 0:
+            kernel = np.ones((close, close), np.uint8)
+            img_thresh = cv2.morphologyEx(img_thresh, cv2.MORPH_CLOSE, kernel)
+        # before k-medicon
+        scale = get_level_downsamples(wsi)[seg_level]
+        scaled_ref_patch_area = int(ref_patch_size**2 / (scale[0] * scale[1]))
+        filter_params = filter_params.copy()
+        filter_params["a_t"] = filter_params["a_t"] * scaled_ref_patch_area
+        filter_params["a_h"] = filter_params["a_h"] * scaled_ref_patch_area
+        # Find and filter contours
+        contours, hierarchy = cv2.findContours(
+            img_thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE
+        )
+        hierarchy = np.squeeze(hierarchy, axis=(0,))[:, 2:]
+        foreground_contours, hole_contours = _filter_contours(
+            contours, hierarchy, filter_params
+        )  # Necessary for filtering out artifacts
+        mask = np.zeros(img_rgb.shape[:2], dtype=np.uint8)
+        for i, cont in enumerate(foreground_contours):
+            if cont is None or len(cont) == 0:
+                print(f"Warning: Empty contour at index {i}")
+                continue
+            if (
+                cont[:, :, 0].max() >= W
+                or cont[:, :, 1].max() >= H
+                or cont[:, :, 0].min() < 0
+                or cont[:, :, 1].min() < 0
+            ):
+                print(f"Warning: Contour {i} coordinates out of bounds!")
+                continue
+            # Fill the main tissue contour
+            cv2.fillPoly(mask, [cont], 255)  # type: ignore
+            # Remove holes if they exist
+            if i < len(hole_contours) and hole_contours[i]:
+                for hole in hole_contours[i]:  # type: ignore
+                    cv2.fillPoly(mask, [hole], 0)  # type: ignore
+        mask = mask.astype(np.bool)
+        if not mask.any():
+            mask[:, :] = True  # If no mask, return full mask
+    return mask, img_rgb
+def get_mask_path_by_wsi_path(wsi_path: Path, wsi_dir: Path, mask_dir: Path) -> Path:
+    wsi_path, wsi_dir, mask_dir = (
+        wsi_path.absolute(),
+        wsi_dir.absolute(),
+        mask_dir.absolute(),
+    )
+    rel_path = wsi_path.relative_to(wsi_dir)
+    stitch_path_prefix = mask_dir / rel_path
+    stitch_path_prefix = stitch_path_prefix.parent / rel_path.stem
+    extensions = ["jpg", "jpeg", "png", "webp"]
+    extensions += [ext.upper() for ext in extensions]
+    stitch_paths = [
+        stitch_path_prefix.parent / (rel_path.stem + f".{ext}") for ext in extensions
+    ]
+    stitch_paths += [
+        stitch_path_prefix.parent / rel_path.stem / (rel_path.stem + f".{ext}")
+        for ext in extensions
+    ]
+    ret = None
+    for stitch_path in stitch_paths:
+        if stitch_path.exists():
+            ret = stitch_path
+    if ret is None:
+        raise FileNotFoundError(
+            f"No mask for wsi '{wsi_path}' in mask dir '{mask_dir}' (candidates: {', '.join([str(p) for p in stitch_paths])})"
+        )
+    return ret
+def read_mask(mask_path: Path) -> np.ndarray:
+    img = Image.open(mask_path)
+    w, h = img.size
+    return np.asarray(img).reshape((h, w, -1)).max(-1) > 0
+def read_mask_by_wsi_path(wsi_path: Path, wsi_dir: Path, mask_dir: Path) -> np.ndarray:
+    wsi_path, wsi_dir, mask_dir = (
+        wsi_path.absolute(),
+        wsi_dir.absolute(),
+        mask_dir.absolute(),
+    )
+    mask_path = get_mask_path_by_wsi_path(wsi_path, wsi_dir, mask_dir)
+    return read_mask(mask_path)