scimilarity.zarr_dataset#
- class scimilarity.zarr_dataset.ZarrDataset(store_path, mode='r')[source]#
Bases:
object
A class that reads and manipulates zarr datasets saved by AnnData from disk. Adapted from https://github.com/lilab-bcb/backedarray
- Parameters:
store_path (str) –
mode (str) –
- append_X(matrix, axis=None)[source]#
- Append to the X sparse matrix.
Only row-wise concatentation for csr_matrix. Only col-wise concatentation for csc_matrix.
- Parameters:
matrix (csr_matrix, csc_matrix) – The sparse matrix.
axis (Optional[int]) –
Examples
>>> zarr_data.append_X(matrix)
- append_annotation(annotation, df)[source]#
Append annotation (i.e. obs, var) from a dataframe.
- Parameters:
annotation (str,) – Annotation name (i.e. obs, var).
df (pandas.DataFrame) –
Examples
>>> zarr_data.append_annotation("obs", df)
- append_matrix(group, matrix, axis=None)[source]#
Append a sparse matrix for a zarr group.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
matrix (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix) – A sparse matrix.
axis (Optional[int]) –
Examples
>>> zarr_data.append_matrix(group, matrix)
- col_slice_csc(group, idx)[source]#
Column slice a sparse csc matrix.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
idx (int,) – Numerical index of the cell.
- Returns:
Sparse csc matrix slice for one column.
- Return type:
scipy.sparse.csc_matrix
Examples
>>> zarr_data.col_slice_csc(group, 42)
- property dataset_info: Dict[str, list]#
Get a summary of the dataset info.
- Returns:
d – A dict containing information on the content of the dataset, such as keys in the various object attributes.
- Return type:
dict
Examples
>>> zarr_data.dataset_info
- get_X(in_mem=False)[source]#
Get the X matrix backed by zarr storage.
- Parameters:
in_mem (bool, default: False) – Return the full matrix in memory rather than a reference to zarr group.
- Returns:
The sparse X matrix.
- Return type:
scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.X
- get_annotation_column(group, column)[source]#
Get an annotation column for a zarr group.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
column (str) – The column name.
- Returns:
The annotation column data, as a pandas categorical series if the data is categorical, otherwise as a numpy ndarray.
- Return type:
numpy.ndarray, pandas.Categorical
Examples
>>> zarr_data.get_annotation_column(group, "sample")
- get_annotation_index(group)[source]#
Get the annotation index for a zarr group.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
- Returns:
The annotation index.
- Return type:
pandas.Index
Examples
>>> zarr_data.get_annotation_index(group)
- get_cell(idx)[source]#
Get gene expression data for one cell row as sparse matrix.
- Parameters:
idx (int,) – Numerical index of the cell.
- Returns:
Cell row data as sparse matrix.
- Return type:
scipy.sparse.csr_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.get_cell(42)
- get_col(group, idx)[source]#
Get sparse column data as sparse matrix.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group
idx (int,) – Numerical index of the cell.
- Returns:
Column data as sparse matrix.
- Return type:
scipy.sparse.csc_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.get_col(group, 42)
- get_counts(in_mem=False)[source]#
Get the count matrix backed by zarr storage.
- Parameters:
in_mem (bool, default: False) – Return the full matrix in memory rather than a reference to zarr group.
- Returns:
The sparse X matrix.
- Return type:
scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.counts
- get_gene(idx)[source]#
Get gene expression data for one gene column as sparse matrix.
- Parameters:
idx (int,) – Numerical index of the gene.
- Returns:
Gene column data as sparse matrix.
- Return type:
scipy.sparse.csc_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.get_gene(42)
- get_layer_cell(layer_key, idx)[source]#
Get data for one cell row from a layer as sparse matrix.
- Parameters:
idx (int,) – Numerical index of the cell.
layer_key (str) –
- Returns:
Cell row data as sparse matrix.
- Return type:
scipy.sparse.csr_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.get_layer_cell(42)
- get_layer_gene(layer_key, idx)[source]#
Get data for one gene column from a layer as sparse matrix.
- Parameters:
layer_key (str) – The layer name.
idx (int,) – Numerical index of the cell.
- Returns:
Gene column data as sparse matrix.
- Return type:
scipy.sparse.csc_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.get_layer_gene(42)
- get_matrix(group, in_mem=False)[source]#
Get the sparse matrix from zarr group.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
in_mem (bool, default: False) – Return the full matrix in memory rather than a reference to zarr group.
- Returns:
Sparse matrix.
- Return type:
scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.get_matrix(group)
- get_obs(column)[source]#
Get data.obs[column] data.
- Parameters:
column (str,) – Column name in obs.
- Returns:
A pandas series containing the obs data.
- Return type:
pandas.Series
Examples
>>> zarr_data.get_obs("celltype_name")
- get_row(group, idx)[source]#
Get sparse row data as sparse matrix.
- Parameters:
group – A zarr group
idx (int,) – Numerical index of the cell.
- Returns:
Row data as sparse matrix.
- Return type:
scipy.sparse.csr_matrix, scipy.sparse.coo_matrix
Examples
>>> zarr_data.get_row(group, 42)
- get_uns(key)[source]#
Get data.uns[key] data.
- Parameters:
key (str,) – Key for the field in uns.
- Returns:
The data in data.uns[key] in the format it was stored as.
- Return type:
object
Examples
>>> zarr_data.get_uns("orig_genes")
- get_var(column)[source]#
Get data.var[column] data.
- Parameters:
column (str,) – Column name in var.
- Returns:
A pandas series containing the var data.
- Return type:
pandas.Series
Examples
>>> zarr_data.get_var("symbol")
- property obs: pandas.DataFrame#
Get the obs dataframe.
- Returns:
A pandas dataframe containing the obs data.
- Return type:
pandas.DataFrame
Examples
>>> zarr_data.obs
- property obs_index: pandas.Index#
Get the obs index.
- Returns:
A pandas Index containing the obs index.
- Return type:
pandas.Index
Examples
>>> zarr_data.obs_index
- row_slice_csr(group, idx)[source]#
Row slice a sparse csr matrix.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
idx (int,) – Numerical index of the cell.
- Returns:
Sparse csr matrix slice for one row.
- Return type:
scipy.sparse.csr_matrix
Examples
>>> zarr_data.row_slice_csr(group, 42)
- set_X(matrix)[source]#
- Set the X sparse matrix.
This will overwrite the current stored X.
- Parameters:
matrix (csr_matrix, csc_matrix, coo_matrix) – The sparse matrix.
Examples
>>> zarr_data.set_X(matrix)
- set_annotation(annotation, df)[source]#
- Store annotation (i.e. obs, var) from a dataframe.
This will overwrite the current data.
- Parameters:
annotation (str,) – Annotation name (i.e. obs, var).
df (pandas.DataFrame) –
Examples
>>> zarr_data.set_annotation("obs", df)
- set_matrix(group, matrix)[source]#
- Set the sparse matrix for a zarr group.
This will overwrite the current data.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
matrix (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix, scipy.sparse.coo_matrix) – A sparse matrix.
Examples
>>> zarr_data.set_matrix(group, matrix)
- property shape: Tuple[int, int]#
Get the shape of the gene expression matrix.
- Returns:
A tuple of the form [nrows x ncolumns].
- Return type:
Tuple[int, int]
Examples
>>> zarr_data.shape
- slice_across(group, idx)[source]#
Slice a sparse matrix, across its directional specification. i.e. column-wise for csr, row-wise for csc. This can be slow for large matrices.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
idx (int,) – Numerical index of the cell.
- Returns:
data (numpy.ndarray) – Sparse matrix data list.
indices (numpy.ndarray) – Sparse matrix indices.
indptr (numpy.ndarray) – Sparse matrix indptr.
- Return type:
Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]
Examples
>>> zarr_data.slice_across(group, 42)
- slice_coo(group, idx, axis)[source]#
Slice a sparse coo matrix.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
idx (int,) – Numerical index of the cell.
axis (int) – The axis along which to slice.
- Returns:
Sparse coo matrix sliced for one row or column.
- Return type:
scipy.sparse.coo_matrix
Examples
>>> zarr_data.slice_coo(group, 42, 0)
- slice_with(group, idx)[source]#
Slice a sparse matrix, with its directional specification. i.e. row-wise for csr, column-wise for csc.
- Parameters:
group (zarr.hierarchy.Group) – A zarr group.
idx (int,) – Numerical index of the cell.
- Returns:
data (numpy.ndarray) – Sparse matrix data list.
indices (numpy.ndarray) – Sparse matrix indices.
indptr (numpy.ndarray) – Sparse matrix indptr.
- Return type:
Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]
Examples
>>> zarr_data.slice_with(group, 42)
- property var: pandas.DataFrame#
Get the var dataframe.
- Returns:
A pandas dataframe containing the var data.
- Return type:
pandas.DataFrame
Examples
>>> zarr_data.var
- property var_index: pandas.Index#
Get the var index.
- Returns:
var_index – A pandas Index containing the var index.
- Return type:
pandas.Index
Examples
>>> zarr_data.var_index