Isosceles 0.2.1
This vignette demonstrates how Isosceles can be used to identify isoforms and
PSI events showing differences in expression along or between different cell
lineages or trajectories. The analysis is based on mouse E18 brain scRNA-Seq
data from the Lebrigand et al. 2020 paper - all the details regarding how the
data was processed can be found in the mouse_E18_analysis
directory of the
Isosceles_Paper repository.
Load the required packages:
library(Isosceles)
library(dplyr)
library(tidyr)
library(tibble)
library(ggplot2)
library(scuttle)
library(DEXSeq)
library(RColorBrewer)
library(pheatmap)
library(dittoSeq)
library(Nebulosa)
Load the Isosceles TCC SummarizedExperiment object:
se_tcc <- readRDS(system.file(
"extdata", "se_tcc_mouse_e18.rds",
package = "Isosceles"
))
Load the processed SingleCellExperiment objects (containing normalized data, UMAP coordinates etc.):
sce_gene <- readRDS(system.file(
"extdata", "sce_gene_mouse_e18.rds",
package = "Isosceles"
))
sce_transcript <- readRDS(system.file(
"extdata", "sce_transcript_mouse_e18.rds",
package = "Isosceles"
))
sce_psi <- readRDS(system.file(
"extdata", "sce_psi_mouse_e18.rds",
package = "Isosceles"
))
Load the matrix pf pseudotime values for individual trajectories calculated using the Slingshot package:
pseudotime_matrix <- readRDS(system.file(
"extdata", "pseudotime_matrix_mouse_e18.rds", package = "Isosceles"
))
head(pseudotime_matrix)
## glut_1 glut_2 gaba rad_glia cyc_rad_glia cr
## 951_cells.AACTCAGAGCGTGAAC NA NA NA NA 30.40207 NA
## 951_cells.AACTCCCTCCGAGCCA NA NA 69.05360 NA NA NA
## 951_cells.AACTCTTTCTTTACGT NA NA 69.25377 NA NA NA
## 951_cells.AAGACCTGTGTGCGTC NA NA 70.40337 NA NA NA
## 951_cells.AAGACCTTCGGTTAAC NA NA 73.85786 NA NA NA
## 951_cells.ACACTGAGTCTCCACT 7.096513 6.642328 NA NA NA NA
This dataset contains the following cell types and trajectories, shown on the UMAP plot below:
cluster_colors <- c(
glut.4 = brewer.pal(n = 7, name = "YlGnBu")[3],
glut.8 = brewer.pal(n = 7, name = "YlGnBu")[4],
glut.5 = brewer.pal(n = 7, name = "YlGnBu")[5],
glut.7 = brewer.pal(n = 7, name = "YlGnBu")[6],
glut.11 = brewer.pal(n = 7, name = "YlGn")[5],
glut.9 = brewer.pal(n = 7, name = "YlGn")[6],
gaba.2 = brewer.pal(n = 7, name = "Reds")[4],
gaba.3 = brewer.pal(n = 7, name = "Reds")[6],
rad_glia.6 = colorRampPalette(c("white", "deeppink"))(7)[6],
cyc_rad_glia.1 = colorRampPalette(c("white", "purple"))(7)[6],
cr.10 = colorRampPalette(c("white", "gold"))(7)[6]
)
umap_df <- as.data.frame(reducedDim(sce_gene, "UMAP"))
colnames(umap_df) <- c("UMAP_1", "UMAP_2")
cluster_umap_df <- umap_df %>%
mutate(cluster = sce_gene$cluster) %>%
group_by(cluster) %>%
summarise(
UMAP_1 = mean(UMAP_1),
UMAP_2 = mean(UMAP_2)
)
umap_lineages <- list(
glut_1 = c("glut.4", "glut.8", "glut.5", "glut.7"),
glut_2 = c("glut.4", "glut.11", "glut.9"),
gaba = c("gaba.2", "gaba.3")
)
lineage_umap_df <- umap_lineages %>%
enframe() %>%
unchop(value) %>%
dplyr::rename(lineage = name, cluster = value) %>%
left_join(cluster_umap_df)
set.seed(103)
dittoDimPlot(sce_gene, "cluster",
reduction.use = "UMAP", main = "",
size = 2, do.label = TRUE, labels.size = 4,
labels.highlight = TRUE, legend.show = FALSE) +
scale_colour_manual(values = cluster_colors) +
geom_path(
data = lineage_umap_df,
size = 0.8,
mapping = aes(x = UMAP_1, y = UMAP_2, group = lineage),
arrow = arrow(ends = "last", type = "closed",
length = unit(0.1, "inches"))
)
Cluster cell type identities have been established using the marker gene expression signatures from the Lebrigand et al. 2020 paper:
marker_sets <- list(
Progenitors = c("Neurog2", "Eomes"), # intermediate progenitors
Glut = c("Neurod6", "Neurod2"), # glutamatergic
Mat_Glut = c("Camk2b", "Opcml", "Crym"), # mature glutamatergic
GABA = c("Gad2", "Gad1", "Dlx2"), # GABAergic
Mat_GABA = c("Maf", "Mafb", "Arx"), # mature GABAergic
Rad_glia = c("Fabp7", "Vim", "Dbi"), #radial glia
Cyc_rad_glia = c("Cenpf", "Top2a", "Mki67"), # cycling radial glia
CR_cells = c("Snhg11", "Lhx5", "Reln") # Cajal-Retzius cells
)
sce_gene_marker <- sce_gene
rownames(sce_gene_marker) <- make.unique(
rowData(sce_gene_marker)$gene_name, sep = "@"
)
cell_marker_scores <- sumCountsAcrossFeatures(
sce_gene_marker, marker_sets,
exprs_values = "logcounts", average = TRUE
)
sce_gene_set <- SingleCellExperiment(
assays = list(logcounts = cell_marker_scores),
colData = colData(sce_gene_marker)
)
reducedDim(sce_gene_set, "UMAP") <- reducedDim(sce_gene_marker, "UMAP")
plot_density(
sce_gene_set,
c("Progenitors", "Glut", "Mat_Glut", "GABA", "Mat_GABA",
"Rad_glia", "Cyc_rad_glia", "CR_cells"),
slot = "logcounts", size = 1
)
Isosceles allows for easy detection of isoform switching events by comparing every pair of cell groups (e.g. clusters) and identifying transcripts of the same gene showing statistically significant differences in opposite directions:
isoswitch_df <- find_isoswitch(
se = sce_transcript,
cell_labels = sce_transcript$cluster,
min_fdr = 0.01
)
Print the isoform switching results:
isoswitch_df
Plot the expression density the Clta gene and its transcripts involved in isoform switching:
p1 <- plot_density(
sce_gene, "ENSMUSG00000028478",
size = 1.5
) +
labs(title = "Clta (ENSMUSG00000028478)") +
theme(legend.position = "right",
plot.title = element_text(size = 12),
legend.title = element_text(size = 11))
p2 <- plot_density(
sce_transcript,
"ISOT-6683-8d1f-209c-002d:s44012650:e44032850:AP:FL",
size = 1.5
) +
scale_color_gradientn(colours = brewer.pal(n = 7, name = "Reds")) +
labs(title = "Clta-204 (ENSMUST00000107849)") +
theme(legend.position = "right",
plot.title = element_text(size = 12),
legend.title = element_text(size = 11))
p3 <- plot_density(
sce_transcript,
"ISOT-eeca-a52b-651e-905d:s44012600:e44032850:AP:FL",
size = 1.5
) +
scale_color_gradientn(colours = brewer.pal(n = 7, name = "Blues")) +
labs(title = "Clta-206 (ENSMUST00000170241)") +
theme(legend.position = "right",
plot.title = element_text(size = 12),
legend.title = element_text(size = 11))
p4 <- plot_density(
sce_transcript,
"ISOT-8a93-5996-c87a-8c5f:s44004450:e44032850:AP:FL",
size = 1.5
) +
scale_color_gradientn(colours = brewer.pal(n = 7, name = "Greens")) +
labs(title = "Clta-202 (ENSMUST00000107846)") +
theme(legend.position = "right",
plot.title = element_text(size = 12),
legend.title = element_text(size = 11))
(p1 + p2) / (p3 + p4)
Isosceles can also be used to aggregate PSI event counts at the pseudotime
window level and create a DEXSeqDataSet object suitable for the analysis of
their changes along a pseudotime trajectory (the workflow can be easily modified
to allow for any method of cell aggregation or experiment design). Given the
number of PSI events found in annotations, we recommend filtering them
(e.g. by the number of cells showing signs of the event’s inclusion) - in this
vignette we’ll restrict the analysis to the row names of the (already
pre-filtered) sce_psi
object. We can prepare a DEXSeqDataSet object for the
glutamatergic trajectory 1 using the following commands:
# Aggregate TCC values using moving window over pseudotime
se_window_tcc <- pseudotime_tcc(
se_tcc = se_tcc,
pseudotime = pseudotime_matrix[, "glut_1"],
window_size = 30,
window_step = 15
)
se_window_gene <- tcc_to_gene(
se_tcc = se_window_tcc
)
se_window_transcript <- tcc_to_transcript(
se_tcc = se_window_tcc,
use_length_normalization = FALSE
)
se_window_psi <- transcript_to_psi(
se = se_window_transcript,
)
se_window_psi <- add_psi_counts(
se_psi = se_window_psi,
se_gene = se_window_gene
)
# Prepare the DEXSeqDataSet object
window_pseudotime <- as.numeric(scale(se_window_psi$pseudotime))
dxd <- psi_to_dexseq(
se_psi = se_window_psi,
condition = window_pseudotime,
psi_events = rownames(sce_psi)
)
Print the DEXSeqDataSet object:
dxd
## class: DEXSeqDataSet
## dim: 30 42
## metadata(1): version
## assays(1): counts
## rownames(30): ENSMUSG00000002107:chr2:6664115-6664197:-:CE
## ENSMUSG00000002107:chr2:6560659-6560670:-:A5 ...
## ENSMUSG00000064302:chr1:118536135-118536197:+:CE
## ENSMUSG00000070544:chr2:160715713-160715827:+:CE
## rowData names(4): featureID groupID exonBaseMean exonBaseVar
## colnames: NULL
## colData names(4): sample se_psi_colname condition exon
Standard DEXSeq workflow can be used for further analysis:
dxd <- estimateSizeFactors(dxd)
dxd <- estimateDispersions(dxd)
dxd <- testForDEU(dxd)
dxd <- estimateExonFoldChanges(dxd, fitExpToVar = "condition")
Process the DEXSeq output to make it more readable:
# Process the results of the DEXSeqResults function
dexseq_results_df <- DEXSeqResults(dxd) %>%
as.data.frame() %>%
dplyr::select(featureID, pvalue, padj) %>%
transmute(
psi_event = featureID,
gene_id = sapply(strsplit(featureID, ":"), "[", 1),
gene_name = rowData(sce_gene[gene_id,])$gene_name,
pvalue = pvalue,
fdr = padj
)
rownames(dexseq_results_df) <- NULL
# Calculate the maximum absolute value of logFC across
# different pseudotime points
logFC_values <- as.data.frame(DEXSeqResults(dxd))
logFC_values <- logFC_values[, grepl("^log2fold_", colnames(logFC_values))]
logFC_values <- as.matrix(logFC_values)
max_abs_logFC <- apply(logFC_values, 1, function(x) {max(abs(x))})
max_abs_logFC[is.na(max_abs_logFC)] <- 0
names(max_abs_logFC) <- NULL
dexseq_results_df$max_abs_logFC <- max_abs_logFC
# Filter the DEXSeq results
dexseq_results_df <- filter(
dexseq_results_df, fdr <= 0.05, max_abs_logFC >= 1
)
Print the DEXSeq results:
dexseq_results_df
Plot the expression density the Celf2 gene and the density of PSI values for its three selected PSI events significantly changing in the glutamatergic trajectory 1:
p1 <- plot_density(
sce_gene, "ENSMUSG00000002107",
size = 1.5
) +
labs(title = "Celf2 (ENSMUSG00000002107)") +
theme(legend.position = "right",
plot.title = element_text(size = 12),
legend.title = element_text(size = 11))
p2 <- plot_density(
sce_psi,
"ENSMUSG00000002107:chr2:6560659-6560670:-:A5",
slot = "psi",
size = 1.5
) +
scale_color_gradientn(colours = brewer.pal(n = 7, name = "Reds")) +
labs(title = "chr2:6560659-6560670:-:A5") +
theme(legend.position = "right",
plot.title = element_text(size = 12),
legend.title = element_text(size = 11))
p3 <- plot_density(
sce_psi,
"ENSMUSG00000002107:chr2:6553965-6553982:-:A3",
slot = "psi",
size = 1.5
) +
scale_color_gradientn(colours = brewer.pal(n = 7, name = "Blues")) +
labs(title = "chr2:6553965-6553982:-:A3") +
theme(legend.position = "right",
plot.title = element_text(size = 12),
legend.title = element_text(size = 11))
p4 <- plot_density(
sce_psi,
"ENSMUSG00000002107:chr2:6546780-6547041:-:RI",
slot = "psi",
size = 1.5
) +
scale_color_gradientn(colours = brewer.pal(n = 7, name = "Greens")) +
labs(title = "chr2:6546780-6547041:-:RI") +
theme(legend.position = "right",
plot.title = element_text(size = 12),
legend.title = element_text(size = 11))
(p1 + p2) / (p3 + p4)
In order to visualize the detected PSI events as a heatmap, Isosceles can calculate the PSI count to mean permuted PSI count ratios for pseudotime windows with smaller, trajectory-dependent window and step sizes (warning: the process will take ~30 min):
set.seed(100)
psi_ratio_matrix <- calculate_psi_ratio_matrix(
se_tcc = se_tcc,
pseudotime_matrix = pseudotime_matrix,
psi_events = dexseq_results_df$psi_event,
window_sizes = c(
glut_1 = 100, glut_2 = 100, gaba = 100,
rad_glia = 50, cyc_rad_glia = 50, cr = 50
),
window_steps = c(
glut_1 = 3, glut_2 = 3, gaba = 3,
rad_glia = 3, cyc_rad_glia = 3, cr = 3
),
n_perm = 10
)
Plot the PSI count ratio heatmap (trajectories are denoted using the same colors as in the trajectory UMAP plot in the Data exploration section):
# Prepare the heatmap values
heatmap_values <- log2(psi_ratio_matrix + 0.1)
heatmap_values[is.nan(heatmap_values)] <- 0
# Prepare heatmap annotations
heatmap_trajectory <- sapply(
strsplit(colnames(heatmap_values), "\\."), "[", 1
)
heatmap_gap_positions <- cumsum(rle(heatmap_trajectory)$lengths)
col_ann_df <- data.frame(
trajectory = heatmap_trajectory
)
rownames(col_ann_df) <- colnames(heatmap_values)
col_ann_colors <- list(
trajectory = c(
glut_1 = brewer.pal(n = 7, name = "YlGnBu")[5],
glut_2 = brewer.pal(n = 7, name = "YlGn")[5],
gaba = brewer.pal(n = 7, name = "Reds")[5],
rad_glia = colorRampPalette(c("white", "deeppink"))(7)[6],
cyc_rad_glia = colorRampPalette(c("white", "purple"))(7)[6],
cr = colorRampPalette(c("white", "gold"))(7)[6]
)
)
# Replace PSI events IDs with more readable labels
gene_id_to_name <- setNames(
rowData(sce_gene)$gene_name, rowData(sce_gene)$gene_id
)
psi_labels <- sapply(
strsplit(rownames(heatmap_values), "\\:"), function(x) {
paste0(c(gene_id_to_name[x[1]], x[3], x[5]),
collapse = ":")
}
)
rownames(heatmap_values) <- psi_labels
# Plot the heatmap
pheatmap(
heatmap_values,
breaks = seq(-3, 3, length.out = 101),
cluster_rows = TRUE, cluster_cols = FALSE,
show_rownames = TRUE, show_colnames = FALSE,
annotation_col = col_ann_df,
annotation_colors = col_ann_colors,
gaps_col = heatmap_gap_positions,
legend = TRUE, annotation_legend = FALSE,
annotation_names_col = FALSE,
fontsize_row = 5, treeheight_row = 0,
scale = "row"
)
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=C
## [4] LC_COLLATE=C LC_MONETARY=C LC_MESSAGES=C
## [7] LC_PAPER=C LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] Nebulosa_1.0.1 patchwork_1.2.0
## [3] dittoSeq_1.14.2 pheatmap_1.0.12
## [5] DEXSeq_1.48.0 RColorBrewer_1.1-3
## [7] AnnotationDbi_1.64.1 DESeq2_1.42.0
## [9] BiocParallel_1.36.0 scuttle_1.12.0
## [11] ggplot2_3.5.1 tibble_3.2.1
## [13] tidyr_1.3.1 dplyr_1.1.4
## [15] Isosceles_0.2.1 SingleCellExperiment_1.24.0
## [17] SummarizedExperiment_1.32.0 Biobase_2.62.0
## [19] GenomicRanges_1.54.1 GenomeInfoDb_1.38.6
## [21] IRanges_2.36.0 S4Vectors_0.40.2
## [23] BiocGenerics_0.48.1 MatrixGenerics_1.14.0
## [25] matrixStats_1.2.0 BiocStyle_2.30.0
##
## loaded via a namespace (and not attached):
## [1] ProtGenerics_1.34.0 bitops_1.0-7
## [3] httr_1.4.7 tools_4.3.1
## [5] backports_1.4.1 utf8_1.2.4
## [7] R6_2.5.1 lazyeval_0.2.2
## [9] uwot_0.1.16 withr_3.0.0
## [11] sp_2.1-3 prettyunits_1.2.0
## [13] GGally_2.2.1 gridExtra_2.3
## [15] progressr_0.14.0 cli_3.6.2
## [17] Cairo_1.6-2 labeling_0.4.3
## [19] ggbio_1.50.0 sass_0.4.8
## [21] mvtnorm_1.2-4 genefilter_1.84.0
## [23] ggridges_0.5.6 Rsamtools_2.18.0
## [25] foreign_0.8-84 dichromat_2.0-0.1
## [27] scater_1.30.1 parallelly_1.37.0
## [29] BSgenome_1.70.2 limma_3.58.1
## [31] rstudioapi_0.15.0 RSQLite_2.3.5
## [33] FNN_1.1.4 generics_0.1.3
## [35] BiocIO_1.12.0 hwriter_1.3.2.1
## [37] Matrix_1.6-5 ggbeeswarm_0.7.2
## [39] fansi_1.0.6 abind_1.4-5
## [41] lifecycle_1.0.4 yaml_2.3.8
## [43] edgeR_4.0.15 SparseArray_1.2.4
## [45] BiocFileCache_2.10.1 grid_4.3.1
## [47] blob_1.2.4 dqrng_0.3.2
## [49] crayon_1.5.2 lattice_0.22-5
## [51] beachmat_2.18.1 cowplot_1.1.3
## [53] GenomicFeatures_1.54.3 annotate_1.80.0
## [55] KEGGREST_1.42.0 magick_2.8.2
## [57] pillar_1.9.0 knitr_1.45
## [59] metapod_1.10.1 rjson_0.2.21
## [61] future.apply_1.11.1 codetools_0.2-19
## [63] fastmatch_1.1-4 glue_1.7.0
## [65] data.table_1.15.0 vctrs_0.6.5
## [67] png_0.1-8 spam_2.10-0
## [69] gtable_0.3.4 assertthat_0.2.1
## [71] cachem_1.0.8 ks_1.14.2
## [73] xfun_0.42 S4Arrays_1.2.0
## [75] pracma_2.4.4 survival_3.5-8
## [77] statmod_1.5.0 bluster_1.12.0
## [79] bit64_4.0.5 progress_1.2.3
## [81] filelock_1.0.3 bslib_0.6.1
## [83] irlba_2.3.5.1 vipor_0.4.7
## [85] KernSmooth_2.23-21 rpart_4.1.23
## [87] colorspace_2.1-0 DBI_1.2.2
## [89] Hmisc_5.1-1 nnet_7.3-19
## [91] ggrastr_1.0.2 tidyselect_1.2.0
## [93] bit_4.0.5 compiler_4.3.1
## [95] curl_5.2.0 graph_1.80.0
## [97] htmlTable_2.4.2 BiocNeighbors_1.20.2
## [99] xml2_1.3.6 DelayedArray_0.28.0
## [101] bookdown_0.37 rtracklayer_1.62.0
## [103] checkmate_2.3.1 scales_1.3.0
## [105] RBGL_1.78.0 rappdirs_0.3.3
## [107] stringr_1.5.1 digest_0.6.34
## [109] rmarkdown_2.25 XVector_0.42.0
## [111] htmltools_0.5.7 pkgconfig_2.0.3
## [113] base64enc_0.1-3 sparseMatrixStats_1.14.0
## [115] highr_0.10 dbplyr_2.4.0
## [117] fastmap_1.1.1 ensembldb_2.26.0
## [119] rlang_1.1.3 htmlwidgets_1.6.4
## [121] DelayedMatrixStats_1.24.0 farver_2.1.1
## [123] jquerylib_0.1.4 jsonlite_1.8.8
## [125] mclust_6.0.1 BiocSingular_1.18.0
## [127] VariantAnnotation_1.48.1 RCurl_1.98-1.14
## [129] magrittr_2.0.3 Formula_1.2-5
## [131] GenomeInfoDbData_1.2.11 dotCall64_1.1-1
## [133] munsell_0.5.0 Rcpp_1.0.12
## [135] viridis_0.6.5 stringi_1.8.3
## [137] zlibbioc_1.48.0 plyr_1.8.9
## [139] ggstats_0.5.1 parallel_4.3.1
## [141] listenv_0.9.1 ggrepel_0.9.5
## [143] Biostrings_2.70.2 splines_4.3.1
## [145] hms_1.1.3 locfit_1.5-9.8
## [147] igraph_1.6.0 reshape2_1.4.4
## [149] geneplotter_1.80.0 biomaRt_2.58.2
## [151] ScaledMatrix_1.10.0 XML_3.99-0.16.1
## [153] evaluate_0.23 SeuratObject_5.0.1
## [155] biovizBase_1.50.0 scran_1.30.2
## [157] BiocManager_1.30.22 purrr_1.0.2
## [159] future_1.33.1 rsvd_1.0.5
## [161] xtable_1.8-4 restfulr_0.0.15
## [163] AnnotationFilter_1.26.0 viridisLite_0.4.2
## [165] OrganismDbi_1.44.0 memoise_2.0.1
## [167] beeswarm_0.4.0 GenomicAlignments_1.38.2
## [169] cluster_2.1.4 globals_0.16.2