Improve masking (#42)

ludvb · Apr 12, 2022 · 042e9a9 · 042e9a9
1 parent 0844a7c
commit 042e9a9
Show file tree

Hide file tree

Showing 13 changed files with 319 additions and 46 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,6 +14,7 @@ repos:
     rev: 19.10b0
     hooks:
       - id: black
+        additional_dependencies: ['click==8.0.4']
 
   - repo: local
     hooks:

diff --git a/.pylintrc b/.pylintrc
@@ -140,7 +140,8 @@ disable=print-statement,
         exception-escape,
         comprehension-escape,
         bad-continuation,  # https://github.com/PyCQA/pylint/issues/289
-        missing-module-docstring
+        missing-module-docstring,
+        duplicate-code
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option

diff --git a/README.org b/README.org
@@ -74,6 +74,16 @@ xfuse convert st --counts section4.tsv --image section4.jpg --transformation-mat
 It may be worthwhile to try out different values for the ~--scale~ argument, which downsamples the image data by the given factor.
 Essentially, a higher scale increases the resolution of the model but requires considerably more compute power.
 
+*** Verifying tissue masks
+
+It is usually a good idea to verify that the computed tissue masks look good.
+This can be done using the script ~./scripts/visualize_tissue_masks.py~ included in this repository:
+#+BEGIN_SRC sh
+curl -LO https://raw.githubusercontent.com/ludvb/xfuse/master/scripts/visualize_tissue_masks.py
+python visualize_tissue_masks.py */data.h5
+#+END_SRC
+The script will show the tissue images with the detected backgrounds blacked out. If tissue detection fails, a custom mask can be passed to ~xfuse convert~ using the ~--mask-file~ argument (see ~xfuse convert visium --help~ for more information).
+
 ** Configuring and starting the run
 
 Settings for the run are specified in a configuration file.

diff --git a/scripts/visualize_tissue_masks.py b/scripts/visualize_tissue_masks.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+import argparse
+
+import h5py
+import matplotlib.pyplot as plt
+import numpy as np
+
+parser = argparse.ArgumentParser()
+parser.add_argument("data_files", nargs="+")
+options = parser.parse_args()
+
+n = len(options.data_files)
+c = int(np.ceil(n ** 0.5))
+r = int(np.ceil(n / c))
+fig, axs = plt.subplots(r, c)
+if n == 1:
+    axs = np.array([axs])
+for filename, ax in zip(options.data_files, axs.flatten()):
+    with h5py.File(filename, "r") as data:
+        img = (data["image"][()] + 1) / 2
+        mask = data["label"][()] == 1
+        img[mask] = 0.25 * img[mask]
+        ax.imshow(img)
+plt.tight_layout()
+plt.show()
diff --git a/tests/data/files/st/mask.png b/tests/data/files/st/mask.png
diff --git a/tests/data/files/visium/mask.png b/tests/data/files/visium/mask.png
diff --git a/tests/test_functional.py b/tests/test_functional.py
@@ -3,7 +3,9 @@
 import os
 from glob import glob
 
+import h5py
 import pytest
+from imageio import imread
 
 from xfuse.__main__ import construct_default_config_toml
 from xfuse.session import Session, Unset, get
@@ -259,6 +261,31 @@ def test_convert_image(extra_args, shared_datadir, script_runner, tmp_path):
     assert os.path.exists(tmp_path / "data.h5")
 
 
+def test_convert_image_with_mask(shared_datadir, script_runner, tmp_path):
+    r"""Test convert image data"""
+
+    mask_file = shared_datadir / "files" / "st" / "mask.png"
+
+    ret = script_runner.run(
+        "xfuse",
+        "convert",
+        "image",
+        "--image=" + str(shared_datadir / "files" / "st" / "image.jpg"),
+        "--mask",
+        "--mask-file=" + str(mask_file),
+        "--no-rotate",
+        "--save-path=" + str(tmp_path),
+    )
+
+    assert ret.success
+
+    mask_original = imread(mask_file)
+    with h5py.File(tmp_path / "data.h5") as data:
+        mask_final = data["label"][()] != 1  # type: ignore
+
+    assert abs(mask_final.sum() - mask_original.sum()) / mask_final.size < 0.05
+
+
 @pytest.mark.parametrize("extra_args", [[], ["--no-mask", "--scale=0.5"]])
 def test_convert_st(extra_args, shared_datadir, script_runner, tmp_path):
     r"""Test convert Spatial Transcriptomics Pipeline run"""
@@ -277,6 +304,33 @@ def test_convert_st(extra_args, shared_datadir, script_runner, tmp_path):
     assert os.path.exists(tmp_path / "data.h5")
 
 
+def test_convert_st_with_mask(shared_datadir, script_runner, tmp_path):
+    r"""Test convert Spatial Transcriptomics Pipeline run with custom mask"""
+
+    mask_file = shared_datadir / "files" / "st" / "mask.png"
+
+    ret = script_runner.run(
+        "xfuse",
+        "convert",
+        "st",
+        "--counts=" + str(shared_datadir / "files" / "st" / "counts.tsv"),
+        "--image=" + str(shared_datadir / "files" / "st" / "image.jpg"),
+        "--spots=" + str(shared_datadir / "files" / "st" / "spots.tsv"),
+        "--mask",
+        "--mask-file=" + str(mask_file),
+        "--no-rotate",
+        "--save-path=" + str(tmp_path),
+    )
+
+    assert ret.success
+
+    mask_original = imread(mask_file)
+    with h5py.File(tmp_path / "data.h5") as data:
+        mask_final = data["label"][()] != 1  # type: ignore
+
+    assert abs(mask_final.sum() - mask_original.sum()) / mask_final.size < 0.05
+
+
 @pytest.mark.parametrize("extra_args", [[], ["--no-mask", "--scale=0.5"]])
 def test_convert_visium(extra_args, shared_datadir, script_runner, tmp_path):
     r"""Test convert Space Ranger run"""
@@ -296,3 +350,33 @@ def test_convert_visium(extra_args, shared_datadir, script_runner, tmp_path):
     )
     assert ret.success
     assert os.path.exists(tmp_path / "data.h5")
+
+
+def test_convert_visium_with_mask(shared_datadir, script_runner, tmp_path):
+    r"""Test convert Spatial Transcriptomics Pipeline run with custom mask"""
+
+    mask_file = shared_datadir / "files" / "visium" / "mask.png"
+
+    ret = script_runner.run(
+        "xfuse",
+        "convert",
+        "visium",
+        "--image=" + str(shared_datadir / "files" / "visium" / "image.jpg"),
+        "--bc-matrix=" + str(shared_datadir / "files" / "visium" / "data.h5"),
+        "--tissue-positions="
+        + str(shared_datadir / "files" / "visium" / "tissue_positions.csv"),
+        "--scale-factors="
+        + str(shared_datadir / "files" / "visium" / "scale_factors.json"),
+        "--mask",
+        "--mask-file=" + str(mask_file),
+        "--no-rotate",
+        "--save-path=" + str(tmp_path),
+    )
+
+    assert ret.success
+
+    mask_original = imread(mask_file)
+    with h5py.File(tmp_path / "data.h5") as data:
+        mask_final = data["label"][()] != 1  # type: ignore
+
+    assert abs(mask_final.sum() - mask_original.sum()) / mask_final.size < 0.05
diff --git a/xfuse/__main__.py b/xfuse/__main__.py
@@ -10,6 +10,7 @@
 from functools import wraps
 
 import click
+import cv2 as cv
 import h5py
 import numpy as np
 import pandas as pd
@@ -101,7 +102,29 @@ def _convert():
 @click.option("--scale-factors", type=click.File("rb"), required=True)
 @click.option("--scale", type=float)
 @click.option("--mask/--no-mask", default=True)
-@click.option("--rotate/--no-rotate", default=False)
+@click.option(
+    "--mask-file",
+    type=click.File("rb"),
+    help=" ".join(
+        [
+            "Custom mask.",
+            "Should be a single-channel image with the same size as the image.",
+            "Uses the following encoding: {}.".format(
+                ", ".join(
+                    sorted(
+                        [
+                            f"{cv.GC_BGD}=background",
+                            f"{cv.GC_PR_BGD}=likely background",
+                            f"{cv.GC_FGD}=foreground",
+                            f"{cv.GC_PR_FGD}=likely foreground",
+                        ]
+                    )
+                )
+            ),
+        ]
+    ),
+)
+@click.option("--rotate/--no-rotate", default=True)
 @_init
 def _convert_visium(
     image,
@@ -111,12 +134,15 @@ def _convert_visium(
     scale_factors,
     scale,
     mask,
+    mask_file,
     rotate,
 ):
     r"""Converts 10X Visium data"""
     tissue_positions = pd.read_csv(tissue_positions, index_col=0, header=None)
-    tissue_positions = tissue_positions[[4, 5]]
-    tissue_positions = tissue_positions.rename(columns={4: "y", 5: "x"})
+    tissue_positions = tissue_positions[[1, 4, 5]]
+    tissue_positions = tissue_positions.rename(
+        columns={1: "in_tissue", 4: "y", 5: "x"}
+    )
 
     scale_factors = json.load(scale_factors)
     spot_radius = scale_factors["spot_diameter_fullres"] / 2
@@ -130,6 +156,12 @@ def _convert_visium(
                 k: annotation_file[k][()] for k in annotation_file.keys()
             }
 
+    if mask_file:
+        with temp_attr(Image, "MAX_IMAGE_PIXELS", None):
+            custom_mask = imread(mask_file)
+    else:
+        custom_mask = None
+
     with h5py.File(bc_matrix, "r") as data:
         convert.visium.run(
             image_data,
@@ -140,6 +172,7 @@ def _convert_visium(
             annotation=annotation,
             scale_factor=scale,
             mask=mask,
+            custom_mask=custom_mask,
             rotate=rotate,
         )
 
@@ -155,7 +188,29 @@ def _convert_visium(
 @click.option("--annotation", type=click.File("rb"))
 @click.option("--scale", type=float)
 @click.option("--mask/--no-mask", default=True)
-@click.option("--rotate/--no-rotate", default=False)
+@click.option(
+    "--mask-file",
+    type=click.File("rb"),
+    help=" ".join(
+        [
+            "Custom mask.",
+            "Should be a single-channel image with the same size as the image.",
+            "Uses the following encoding: {}.".format(
+                ", ".join(
+                    sorted(
+                        [
+                            f"{cv.GC_BGD}=background",
+                            f"{cv.GC_PR_BGD}=likely background",
+                            f"{cv.GC_FGD}=foreground",
+                            f"{cv.GC_PR_FGD}=likely foreground",
+                        ]
+                    )
+                )
+            ),
+        ]
+    ),
+)
+@click.option("--rotate/--no-rotate", default=True)
 @_init
 def _convert_st(
     counts,
@@ -165,6 +220,7 @@ def _convert_st(
     annotation,
     scale,
     mask,
+    mask_file,
     rotate,
 ):
     r"""Converts Spatial Transcriptomics ("ST") data"""
@@ -196,6 +252,12 @@ def _convert_st(
                 k: annotation_file[k][()] for k in annotation_file.keys()
             }
 
+    if mask_file:
+        with temp_attr(Image, "MAX_IMAGE_PIXELS", None):
+            custom_mask = imread(mask_file)
+    else:
+        custom_mask = None
+
     convert.st.run(
         counts_data,
         image_data,
@@ -205,6 +267,7 @@ def _convert_st(
         annotation=annotation,
         scale_factor=scale,
         mask=mask,
+        custom_mask=custom_mask,
         rotate=rotate,
     )
 
@@ -217,10 +280,32 @@ def _convert_st(
 @click.option("--annotation", type=click.File("rb"))
 @click.option("--scale", type=float)
 @click.option("--mask/--no-mask", default=True)
+@click.option(
+    "--mask-file",
+    type=click.File("rb"),
+    help=" ".join(
+        [
+            "Custom mask.",
+            "Should be a single-channel image with the same size as the image.",
+            "Uses the following encoding: {}.".format(
+                ", ".join(
+                    sorted(
+                        [
+                            f"{cv.GC_BGD}=background",
+                            f"{cv.GC_PR_BGD}=likely background",
+                            f"{cv.GC_FGD}=foreground",
+                            f"{cv.GC_PR_FGD}=likely foreground",
+                        ]
+                    )
+                )
+            ),
+        ]
+    ),
+)
 @click.option("--rotate/--no-rotate", default=False)
 @_init
 def _convert_image(
-    image, annotation, scale, mask, rotate,
+    image, annotation, scale, mask, mask_file, rotate,
 ):
     r"""Converts image without any associated expression data"""
     with temp_attr(Image, "MAX_IMAGE_PIXELS", None):
@@ -232,12 +317,19 @@ def _convert_image(
                 k: annotation_file[k][()] for k in annotation_file.keys()
             }
 
+    if mask_file:
+        with temp_attr(Image, "MAX_IMAGE_PIXELS", None):
+            custom_mask = imread(mask_file)
+    else:
+        custom_mask = None
+
     convert.image.run(
         image_data,
         output_file="data.h5",
         annotation=annotation,
         scale_factor=scale,
         mask=mask,
+        custom_mask=custom_mask,
         rotate=rotate,
     )