Skip to content

Commit

Permalink
Backport PR #2414 on branch 1.9.x (matplotlib 3.7 compat) (#2419)
Browse files Browse the repository at this point in the history
* Backport PR #2414: matplotlib 3.7 compat

* fix scrublet

* Update visium default plot for matplotlib 3.7

* Update hashsolo docstrings

* skip plotting test that changed on mpl 3.7 if mpl < 3.7 is installed

* Fix hashsolo docs (again)

* update anndata-dev tests to install anndata test deps

* Temporarily set warnings as errors to False for doc builds

* Release notes

* Fix using custom layer with highly_variable_genes (#2302)

* Fix using custom layer with highly_variable_genes

* Add tests

* Add release note

* Move release note to correct section

* Format release notes

* Add check for number of normalized dispersions (#2231)

* Add check for number of normalized dispersions

In sc.pp.highly_variable_genes() when flavor='cell_ranger' and
n_top_genes is set check that enough normalized dispersions have been
calculated and if not raise a warning and set n_top_genes to the number
of calculated dispersions.

Fixes #2230

* Use .size instead of len()

* Add test for n_top_genes warning

* Add release note

* Remove blank line

Co-authored-by: Isaac Virshup <[email protected]>

---------

Co-authored-by: Isaac Virshup <[email protected]>
Co-authored-by: adamgayoso <[email protected]>
Co-authored-by: Dries Schaumont <[email protected]>
Co-authored-by: Luke Zappia <[email protected]>
  • Loading branch information
5 people authored Feb 16, 2023
1 parent 97c2617 commit 1fbbfcd
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
displayName: 'Install dependencies'
- script: |
pip install -v git+https://github.com/scverse/anndata
'pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata"'
displayName: 'Install development anndata'
condition: eq(variables['ANNDATA_DEV'], 'yes')
Expand Down
4 changes: 3 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@


def setup(app):
app.warningiserror = on_rtd
app.warningiserror = (
False # Temporarily disable warnings as errors to get 1.9.2 out
)


# -- Options for other output formats ------------------------------------------
Expand Down
12 changes: 5 additions & 7 deletions docs/release-notes/1.9.2.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
### 1.9.2 {small}`the future`


```{rubric} Documentation
```
### 1.9.2 {small}`2023-02-16`

```{rubric} Bug fixes
```

```{rubric} Performance
```
* {func}`~scanpy.pp.highly_variable_genes` `layer` argument now works in tandem with `batches` {pr}`2302` {smaller}`D Schaumont`
* {func}`~scanpy.pp.highly_variable_genes` with `flavor='cell_ranger'` now handles the case in {issue}`2230` where the number of calculated dispersions is less than `n_top_genes` {pr}`2231` {smaller}`L Zappia`
* Fix compatibility with matplotlib 3.7 {pr}`2414` {smaller}`I Virshup` {smaller}`P Fisher`
* Fix scrublet numpy matrix compatibility issue {pr}`2395` {smaller}`A Gayoso`
17 changes: 9 additions & 8 deletions scanpy/external/pp/_hashsolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@


def _calculate_log_likelihoods(data, number_of_noise_barcodes):
"""Calculate log likelihoods for each hypothesis, negative, singlet, doublet
"""\
Calculate log likelihoods for each hypothesis, negative, singlet, doublet
Parameters
----------
Expand All @@ -43,8 +44,8 @@ def _calculate_log_likelihoods(data, number_of_noise_barcodes):
"""

def gaussian_updates(data, mu_o, std_o):
"""Update parameters of your gaussian
https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf
"""\
Update parameters of your gaussian https://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf
Parameters
----------
Expand Down Expand Up @@ -210,7 +211,7 @@ def gaussian_updates(data, mu_o, std_o):


def _calculate_bayes_rule(data, priors, number_of_noise_barcodes):
"""
"""\
Calculate bayes rule from log likelihoods
Parameters
Expand Down Expand Up @@ -263,7 +264,8 @@ def hashsolo(
number_of_noise_barcodes: int = None,
inplace: bool = True,
):
"""Probabilistic demultiplexing of cell hashing data using HashSolo [Bernstein20]_.
"""\
Probabilistic demultiplexing of cell hashing data using HashSolo [Bernstein20]_.
.. note::
More information and bug reports `here <https://github.com/calico/solo>`__.
Expand Down Expand Up @@ -294,9 +296,8 @@ def hashsolo(
Returns
-------
adata
if inplace is False returns AnnData with demultiplexing results
in .obs attribute otherwise does is in place
if inplace is False returns AnnData with demultiplexing results
in .obs attribute otherwise does is in place
Examples
-------
Expand Down
5 changes: 5 additions & 0 deletions scanpy/external/pp/_scrublet.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,11 @@ def _scrublet_call_doublets(

if mean_center:
logg.info('Embedding transcriptomes using PCA...')
# Sklearn PCA doesn't like matrices, so convert to arrays
if isinstance(scrub._E_obs_norm, np.matrix):
scrub._E_obs_norm = np.asarray(scrub._E_obs_norm)
if isinstance(scrub._E_sim_norm, np.matrix):
scrub._E_sim_norm = np.asarray(scrub._E_sim_norm)
sl.pipeline_pca(
scrub, n_prin_comps=n_prin_comps, random_state=scrub.random_state
)
Expand Down
2 changes: 1 addition & 1 deletion scanpy/plotting/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
VBound = Union[str, float, Callable[[Sequence[float]], float]]


class _AxesSubplot(Axes, axes.SubplotBase, ABC):
class _AxesSubplot(Axes, axes.SubplotBase):
"""Intersection between Axes and SubplotBase: Has methods of both"""


Expand Down
7 changes: 7 additions & 0 deletions scanpy/preprocessing/_highly_variable_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,12 @@ def _highly_variable_genes_single_batch(
if n_top_genes > adata.n_vars:
logg.info('`n_top_genes` > `adata.n_var`, returning all genes.')
n_top_genes = adata.n_vars
if n_top_genes > dispersion_norm.size:
warnings.warn(
'`n_top_genes` > number of normalized dispersions, returning all genes with normalized dispersions.',
UserWarning,
)
n_top_genes = dispersion_norm.size
disp_cut_off = dispersion_norm[n_top_genes - 1]
gene_subset = np.nan_to_num(df['dispersions_norm'].values) >= disp_cut_off
logg.debug(
Expand Down Expand Up @@ -458,6 +464,7 @@ def highly_variable_genes(

hvg = _highly_variable_genes_single_batch(
adata_subset,
layer=layer,
min_disp=min_disp,
max_disp=max_disp,
min_mean=min_mean,
Expand Down
Binary file modified scanpy/tests/_images/master_spatial_visium_default.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 6 additions & 0 deletions scanpy/tests/test_embedding_plots.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from functools import partial
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from matplotlib.testing.compare import compare_images
Expand Down Expand Up @@ -304,6 +305,11 @@ def test_visium_circles(image_comparer): # standard visium data


def test_visium_default(image_comparer): # default values
from packaging.version import parse as parse_version

if parse_version(mpl.__version__) < parse_version("3.7.0"):
pytest.xfail("Matplotlib 3.7.0+ required for this test")

save_and_compare_images = image_comparer(ROOT, FIGS, tol=5)
adata = sc.read_visium(HERE / '_data' / 'visium_data' / '1.0.0')
adata.obs = adata.obs.astype({'array_row': 'str'})
Expand Down
30 changes: 29 additions & 1 deletion scanpy/tests/test_highly_variable_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,26 @@ def test_highly_variable_genes_basic():
assert 'highly_variable_intersection' in adata.var.columns

adata = sc.datasets.blobs()
adata.obs['batch'] = np.random.binomial(4, 0.5, size=(adata.n_obs))
batch = np.random.binomial(4, 0.5, size=(adata.n_obs))
adata.obs['batch'] = batch
adata.obs['batch'] = adata.obs['batch'].astype('category')
sc.pp.highly_variable_genes(adata, batch_key='batch', n_top_genes=3)
assert 'highly_variable_nbatches' in adata.var.columns
assert adata.var['highly_variable'].sum() == 3
highly_var_first_layer = adata.var['highly_variable']

adata = sc.datasets.blobs()
new_layer = adata.X.copy()
np.random.shuffle(new_layer)
adata.layers['test_layer'] = new_layer
adata.obs['batch'] = batch
adata.obs['batch'] = adata.obs['batch'].astype('category')
sc.pp.highly_variable_genes(
adata, batch_key='batch', n_top_genes=3, layer='test_layer'
)
assert 'highly_variable_nbatches' in adata.var.columns
assert adata.var['highly_variable'].sum() == 3
assert (highly_var_first_layer != adata.var['highly_variable']).any()

sc.pp.highly_variable_genes(adata)
no_batch_hvg = adata.var.highly_variable.copy()
Expand Down Expand Up @@ -491,3 +506,16 @@ def test_seurat_v3_mean_var_output_with_batchkey():
)
np.testing.assert_allclose(true_mean, result_df['means'], rtol=2e-05, atol=2e-05)
np.testing.assert_allclose(true_var, result_df['variances'], rtol=2e-05, atol=2e-05)


def test_cellranger_n_top_genes_warning():
X = np.random.poisson(2, (100, 30))
adata = sc.AnnData(X, dtype=X.dtype)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

with pytest.warns(
UserWarning,
match="`n_top_genes` > number of normalized dispersions, returning all genes with normalized dispersions.",
):
sc.pp.highly_variable_genes(adata, n_top_genes=1000, flavor="cell_ranger")

0 comments on commit 1fbbfcd

Please sign in to comment.