Skip to content

Commit

Permalink
Backport of Prepare for pandas 2.0 (#2434) onto 1.9.x (#2435)
Browse files Browse the repository at this point in the history
* Prepare for pandas 2.0 (#2434)

* is_categorical -> is_categorical_dtype

* cat.replace(to_remove, np.nan) -> cat.remove_categories(to_remove)

* df1.append(df2) -> pd.concat([df1, df2])

* Series.iteritems -> Series.items

* Fix indexing a pandas object with a set in score genes

* Release notes

(cherry picked from commit 0692ef9)

* Fix anndata-dev test

* anndata-dev compatibility

* Prep 1.9.3

* Fix pr reference
  • Loading branch information
ivirshup authored Mar 2, 2023
1 parent 1fbbfcd commit ed3b277
Show file tree
Hide file tree
Showing 11 changed files with 31 additions and 21 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
displayName: 'Install dependencies'
- script: |
'pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata"'
pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata"
displayName: 'Install development anndata'
condition: eq(variables['ANNDATA_DEV'], 'yes')
Expand Down
7 changes: 7 additions & 0 deletions docs/release-notes/1.9.3.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### 1.9.3 {small}`2023-03-02`

```{rubric} Bug fixes
```

* Variety of fixes against pandas 2.0.0rc0 {pr}`2434` {smaller}`I Virshup`
* Compatibility with anndata 0.9.0rc {pr}`2435` {smaller}`I Virshup`
2 changes: 1 addition & 1 deletion scanpy/datasets/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def paul15() -> ad.AnnData:
clusters = f['cluster.id'][()].flatten().astype(int)
infogenes_names = f['info.genes_strings'][()].astype(str)
# each row has to correspond to a observation, therefore transpose
adata = ad.AnnData(X.transpose(), dtype=X.dtype)
adata = ad.AnnData(X.transpose(), dtype=np.float32)
adata.var_names = gene_names
adata.row_names = cell_names
# names reflecting the cell type identifications from the paper
Expand Down
10 changes: 5 additions & 5 deletions scanpy/external/exporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import h5py
import matplotlib.pyplot as plt
from anndata import AnnData
from pandas.api.types import is_categorical
from pandas.api.types import is_categorical_dtype

from ..preprocessing._utils import _get_mean_var
from .._utils import NeighborsView
Expand Down Expand Up @@ -148,31 +148,31 @@ def spring_project(
continuous_extras = {}
if cell_groupings is None:
for obs_name in adata.obs:
if is_categorical(adata.obs[obs_name]):
if is_categorical_dtype(adata.obs[obs_name]):
categorical_extras[obs_name] = [str(x) for x in adata.obs[obs_name]]
else:
if isinstance(cell_groupings, str):
cell_groupings = [cell_groupings]
for obs_name in cell_groupings:
if obs_name not in adata.obs:
logg.warning(f'Cell grouping {obs_name!r} is not in adata.obs')
elif is_categorical(adata.obs[obs_name]):
elif is_categorical_dtype(adata.obs[obs_name]):
categorical_extras[obs_name] = [str(x) for x in adata.obs[obs_name]]
else:
logg.warning(
f'Cell grouping {obs_name!r} is not a categorical variable'
)
if custom_color_tracks is None:
for obs_name in adata.obs:
if not is_categorical(adata.obs[obs_name]):
if not is_categorical_dtype(adata.obs[obs_name]):
continuous_extras[obs_name] = np.array(adata.obs[obs_name])
else:
if isinstance(custom_color_tracks, str):
custom_color_tracks = [custom_color_tracks]
for obs_name in custom_color_tracks:
if obs_name not in adata.obs:
logg.warning(f'Custom color track {obs_name!r} is not in adata.obs')
elif not is_categorical(adata.obs[obs_name]):
elif not is_categorical_dtype(adata.obs[obs_name]):
continuous_extras[obs_name] = np.array(adata.obs[obs_name])
else:
logg.warning(
Expand Down
2 changes: 1 addition & 1 deletion scanpy/plotting/_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2411,7 +2411,7 @@ def _plot_categories_as_colorblocks(
labels = []
label2code = {} # dictionary of numerical values asigned to each label
for code, (label, value) in enumerate(
obs_tidy.index.value_counts(sort=False).iteritems()
obs_tidy.index.value_counts(sort=False).items()
):
ticks.append(value_sum + (value / 2))
labels.append(label)
Expand Down
2 changes: 1 addition & 1 deletion scanpy/plotting/_tools/scatterplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,7 @@ def _get_color_source_vector(
else:
values = adata.obs_vector(value_to_plot, layer=layer)
if groups and is_categorical_dtype(values):
values = values.replace(values.categories.difference(groups), np.nan)
values = values.remove_categories(values.categories.difference(groups))
return values


Expand Down
2 changes: 1 addition & 1 deletion scanpy/preprocessing/_highly_variable_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ def highly_variable_genes(
missing_hvg['highly_variable'] = missing_hvg['highly_variable'].astype(bool)
missing_hvg['gene'] = gene_list[~filt]
hvg['gene'] = adata_subset.var_names.values
hvg = hvg.append(missing_hvg, ignore_index=True)
hvg = pd.concat([hvg, missing_hvg], ignore_index=True)

# Order as before filtering
idxs = np.concatenate((np.where(filt)[0], np.where(~filt)[0]))
Expand Down
1 change: 1 addition & 0 deletions scanpy/tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def test_repeated_gene_symbols():
adata = sc.AnnData(
np.arange(3 * 4).reshape((3, 4)),
var=pd.DataFrame({"gene_symbols": gene_symbols}, index=var_names),
dtype=np.float32,
)

with pytest.raises(KeyError, match="symbol_b"):
Expand Down
5 changes: 3 additions & 2 deletions scanpy/tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
[7.0, 9.4, 6.8, 9.1, 8.0],
[8.9, 8.6, 9.6, 1.0, 2.0],
[6.5, 8.9, 2.2, 4.5, 8.9],
]
],
dtype=np.float32,
)

T = np.array([[2.0, 3.5, 4.0, 1.0, 4.7], [3.2, 2.0, 5.0, 5.0, 8.0]])
T = np.array([[2.0, 3.5, 4.0, 1.0, 4.7], [3.2, 2.0, 5.0, 5.0, 8.0]], dtype=np.float32)


@pytest.fixture
Expand Down
17 changes: 9 additions & 8 deletions scanpy/tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@


def test_log1p(tmp_path):
A = np.random.rand(200, 10)
A = np.random.rand(200, 10).astype(np.float32)
A_l = np.log1p(A)
ad = AnnData(A)
ad2 = AnnData(A)
ad3 = AnnData(A)
ad = AnnData(A.copy())
ad2 = AnnData(A.copy())
ad3 = AnnData(A.copy())
ad3.filename = tmp_path / 'test.h5ad'
sc.pp.log1p(ad)
assert np.allclose(ad.X, A_l)
Expand Down Expand Up @@ -84,18 +84,19 @@ def test_mean_var_sparse():


def test_normalize_per_cell():
adata = AnnData(np.array([[1, 0], [3, 0], [5, 6]]))
A = np.array([[1, 0], [3, 0], [5, 6]], dtype=np.float32)
adata = AnnData(A.copy())
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1, key_n_counts='n_counts2')
assert adata.X.sum(axis=1).tolist() == [1.0, 1.0, 1.0]
# now with copy option
adata = AnnData(np.array([[1, 0], [3, 0], [5, 6]]))
adata = AnnData(A.copy())
# note that sc.pp.normalize_per_cell is also used in
# pl.highest_expr_genes with parameter counts_per_cell_after=100
adata_copy = sc.pp.normalize_per_cell(adata, counts_per_cell_after=1, copy=True)
assert adata_copy.X.sum(axis=1).tolist() == [1.0, 1.0, 1.0]
# now sparse
adata = AnnData(np.array([[1, 0], [3, 0], [5, 6]]))
adata_sparse = AnnData(sp.csr_matrix([[1, 0], [3, 0], [5, 6]]))
adata = AnnData(A.copy())
adata_sparse = AnnData(sp.csr_matrix(A.copy()))
sc.pp.normalize_per_cell(adata)
sc.pp.normalize_per_cell(adata_sparse)
assert adata.X.sum(axis=1).tolist() == adata_sparse.X.sum(axis=1).A1.tolist()
Expand Down
2 changes: 1 addition & 1 deletion scanpy/tools/_score_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def score_genes(
control_genes = set()

# now pick `ctrl_size` genes from every cut
for cut in np.unique(obs_cut.loc[gene_list]):
for cut in np.unique(obs_cut.loc[list(gene_list)]):
r_genes = np.array(obs_cut[obs_cut == cut].index)
np.random.shuffle(r_genes)
# uses full r_genes if ctrl_size > len(r_genes)
Expand Down

0 comments on commit ed3b277

Please sign in to comment.