scimilarity.zarr_dataset#

class scimilarity.zarr_dataset.ZarrDataset(store_path, mode='r')[source]#

Bases: object

A class that reads and manipulates zarr datasets saved by AnnData from disk. Adapted from https://github.com/lilab-bcb/backedarray

Parameters:
  • store_path (str) –

  • mode (str) –

append_X(matrix, axis=None)[source]#
Append to the X sparse matrix.

Only row-wise concatentation for csr_matrix. Only col-wise concatentation for csc_matrix.

Parameters:
  • matrix (csr_matrix, csc_matrix) – The sparse matrix.

  • axis (Optional[int]) –

Examples

>>> zarr_data.append_X(matrix)
append_annotation(annotation, df)[source]#

Append annotation (i.e. obs, var) from a dataframe.

Parameters:
  • annotation (str,) – Annotation name (i.e. obs, var).

  • df (pandas.DataFrame) –

Examples

>>> zarr_data.append_annotation("obs", df)
append_matrix(group, matrix, axis=None)[source]#

Append a sparse matrix for a zarr group.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • matrix (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix) – A sparse matrix.

  • axis (Optional[int]) –

Examples

>>> zarr_data.append_matrix(group, matrix)
col_slice_csc(group, idx)[source]#

Column slice a sparse csc matrix.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • idx (int,) – Numerical index of the cell.

Returns:

Sparse csc matrix slice for one column.

Return type:

scipy.sparse.csc_matrix

Examples

>>> zarr_data.col_slice_csc(group, 42)
property dataset_info: Dict[str, list]#

Get a summary of the dataset info.

Returns:

d – A dict containing information on the content of the dataset, such as keys in the various object attributes.

Return type:

dict

Examples

>>> zarr_data.dataset_info
get_X(in_mem=False)[source]#

Get the X matrix backed by zarr storage.

Parameters:

in_mem (bool, default: False) – Return the full matrix in memory rather than a reference to zarr group.

Returns:

The sparse X matrix.

Return type:

scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.X
get_annotation_column(group, column)[source]#

Get an annotation column for a zarr group.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • column (str) – The column name.

Returns:

The annotation column data, as a pandas categorical series if the data is categorical, otherwise as a numpy ndarray.

Return type:

numpy.ndarray, pandas.Categorical

Examples

>>> zarr_data.get_annotation_column(group, "sample")
get_annotation_index(group)[source]#

Get the annotation index for a zarr group.

Parameters:

group (zarr.hierarchy.Group) – A zarr group.

Returns:

The annotation index.

Return type:

pandas.Index

Examples

>>> zarr_data.get_annotation_index(group)
get_cell(idx)[source]#

Get gene expression data for one cell row as sparse matrix.

Parameters:

idx (int,) – Numerical index of the cell.

Returns:

Cell row data as sparse matrix.

Return type:

scipy.sparse.csr_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.get_cell(42)
get_col(group, idx)[source]#

Get sparse column data as sparse matrix.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group

  • idx (int,) – Numerical index of the cell.

Returns:

Column data as sparse matrix.

Return type:

scipy.sparse.csc_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.get_col(group, 42)
get_counts(in_mem=False)[source]#

Get the count matrix backed by zarr storage.

Parameters:

in_mem (bool, default: False) – Return the full matrix in memory rather than a reference to zarr group.

Returns:

The sparse X matrix.

Return type:

scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.counts
get_gene(idx)[source]#

Get gene expression data for one gene column as sparse matrix.

Parameters:

idx (int,) – Numerical index of the gene.

Returns:

Gene column data as sparse matrix.

Return type:

scipy.sparse.csc_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.get_gene(42)
get_layer_cell(layer_key, idx)[source]#

Get data for one cell row from a layer as sparse matrix.

Parameters:
  • idx (int,) – Numerical index of the cell.

  • layer_key (str) –

Returns:

Cell row data as sparse matrix.

Return type:

scipy.sparse.csr_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.get_layer_cell(42)
get_layer_gene(layer_key, idx)[source]#

Get data for one gene column from a layer as sparse matrix.

Parameters:
  • layer_key (str) – The layer name.

  • idx (int,) – Numerical index of the cell.

Returns:

Gene column data as sparse matrix.

Return type:

scipy.sparse.csc_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.get_layer_gene(42)
get_matrix(group, in_mem=False)[source]#

Get the sparse matrix from zarr group.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • in_mem (bool, default: False) – Return the full matrix in memory rather than a reference to zarr group.

Returns:

Sparse matrix.

Return type:

scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.get_matrix(group)
get_obs(column)[source]#

Get data.obs[column] data.

Parameters:

column (str,) – Column name in obs.

Returns:

A pandas series containing the obs data.

Return type:

pandas.Series

Examples

>>> zarr_data.get_obs("celltype_name")
get_row(group, idx)[source]#

Get sparse row data as sparse matrix.

Parameters:
  • group – A zarr group

  • idx (int,) – Numerical index of the cell.

Returns:

Row data as sparse matrix.

Return type:

scipy.sparse.csr_matrix, scipy.sparse.coo_matrix

Examples

>>> zarr_data.get_row(group, 42)
get_uns(key)[source]#

Get data.uns[key] data.

Parameters:

key (str,) – Key for the field in uns.

Returns:

The data in data.uns[key] in the format it was stored as.

Return type:

object

Examples

>>> zarr_data.get_uns("orig_genes")
get_var(column)[source]#

Get data.var[column] data.

Parameters:

column (str,) – Column name in var.

Returns:

A pandas series containing the var data.

Return type:

pandas.Series

Examples

>>> zarr_data.get_var("symbol")
property obs: pandas.DataFrame#

Get the obs dataframe.

Returns:

A pandas dataframe containing the obs data.

Return type:

pandas.DataFrame

Examples

>>> zarr_data.obs
property obs_index: pandas.Index#

Get the obs index.

Returns:

A pandas Index containing the obs index.

Return type:

pandas.Index

Examples

>>> zarr_data.obs_index
row_slice_csr(group, idx)[source]#

Row slice a sparse csr matrix.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • idx (int,) – Numerical index of the cell.

Returns:

Sparse csr matrix slice for one row.

Return type:

scipy.sparse.csr_matrix

Examples

>>> zarr_data.row_slice_csr(group, 42)
set_X(matrix)[source]#
Set the X sparse matrix.

This will overwrite the current stored X.

Parameters:

matrix (csr_matrix, csc_matrix, coo_matrix) – The sparse matrix.

Examples

>>> zarr_data.set_X(matrix)
set_annotation(annotation, df)[source]#
Store annotation (i.e. obs, var) from a dataframe.

This will overwrite the current data.

Parameters:
  • annotation (str,) – Annotation name (i.e. obs, var).

  • df (pandas.DataFrame) –

Examples

>>> zarr_data.set_annotation("obs", df)
set_matrix(group, matrix)[source]#
Set the sparse matrix for a zarr group.

This will overwrite the current data.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • matrix (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix) – A sparse matrix.

Examples

>>> zarr_data.set_matrix(group, matrix)
property shape: Tuple[int, int]#

Get the shape of the gene expression matrix.

Returns:

A tuple of the form [nrows x ncolumns].

Return type:

Tuple[int, int]

Examples

>>> zarr_data.shape
slice_across(group, idx)[source]#

Slice a sparse matrix, across its directional specification. i.e. column-wise for csr, row-wise for csc. This can be slow for large matrices.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • idx (int,) – Numerical index of the cell.

Returns:

  • data (numpy.ndarray) – Sparse matrix data list.

  • indices (numpy.ndarray) – Sparse matrix indices.

  • indptr (numpy.ndarray) – Sparse matrix indptr.

Return type:

Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]

Examples

>>> zarr_data.slice_across(group, 42)
slice_coo(group, idx, axis)[source]#

Slice a sparse coo matrix.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • idx (int,) – Numerical index of the cell.

  • axis (int) – The axis along which to slice.

Returns:

Sparse coo matrix sliced for one row or column.

Return type:

scipy.sparse.coo_matrix

Examples

>>> zarr_data.slice_coo(group, 42, 0)
slice_with(group, idx)[source]#

Slice a sparse matrix, with its directional specification. i.e. row-wise for csr, column-wise for csc.

Parameters:
  • group (zarr.hierarchy.Group) – A zarr group.

  • idx (int,) – Numerical index of the cell.

Returns:

  • data (numpy.ndarray) – Sparse matrix data list.

  • indices (numpy.ndarray) – Sparse matrix indices.

  • indptr (numpy.ndarray) – Sparse matrix indptr.

Return type:

Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]

Examples

>>> zarr_data.slice_with(group, 42)
property var: pandas.DataFrame#

Get the var dataframe.

Returns:

A pandas dataframe containing the var data.

Return type:

pandas.DataFrame

Examples

>>> zarr_data.var
property var_index: pandas.Index#

Get the var index.

Returns:

var_index – A pandas Index containing the var index.

Return type:

pandas.Index

Examples

>>> zarr_data.var_index