exp_id <- "20231009_multiome_analysis/03-integration" # determines the name of the cache folder
doc_id <- "05c" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out <- here::here("outs", exp_id, doc_id, "/"); dir.create(out, recursive = TRUE)
figout <- here::here("outs", exp_id, doc_id, "figures/")
cache <- file.path("~/scratch/cache", exp_id, doc_id, "/")
Here, we’ll assemble the final annotations for each dataset. Consulting several resources:
Hodge, R.D., Bakken, T.E., Miller, J.A., Smith, K.A., Barkan, E.R., Graybuck, L.T., Close, J.L., Long, B., Johansen, N., Penn, O., et al. (2019). Conserved cell types with divergent features in human versus mouse cortex. Nature 573, 61–68. 10.1038/s41586-019-1506-7.
Bakken, T.E., Jorstad, N.L., Hu, Q., Lake, B.B., Tian, W., Kalmbach, B.E., Crow, M., Hodge, R.D., Krienen, F.M., Sorensen, S.A., et al. (2021). Comparative cellular analysis of motor cortex in human, marmoset and mouse. Nature 598, 111–119. 10.1038/s41586-021-03465-8.
Franjic, D., Skarica, M., Ma, S., Arellano, J.I., Tebbenkamp, A.T.N., Choi, J., Xu, C., Li, Q., Morozov, Y.M., Andrijevic, D., et al. (2022). Transcriptomic taxonomy and neurogenic trajectories of adult human, macaque, and pig hippocampal and entorhinal cells. Neuron 110, 452–469.e14. 10.1016/j.neuron.2021.10.036.
In general, to finalize cell type annotation, we will subcluster glutamatergic and GABAergic neurons separately.
library(here)
library(tidyr)
library(dplyr)
library(purrr)
library(readr)
library(glue)
library(Seurat)
library(ggplot2)
library(Signac)
source(here("functions_r/scRNAseq.R"))
ggplot2::theme_set(theme_min())
merged_dir <- "/oak/stanford/groups/akundaje/projects/psychencode/outs/20231009_multiome_analysis/03-integration/merged_objects"
hippocampus <- readRDS(file.path(merged_dir,
"Hippocampus.integrated.batch.analyzed.rds"))
We manually assigned colors for each broad label, taken from the following studies to approximately match the color scheme used in the Allen Institute papers.
Conventions:
predicted.cluster_broad
is the per-cell automated
annotation from Azimuth, for the hippocampus referencepredicted.cell_type_percluster
is the majority vote of
the per-cell label, within each cluster# load labels
labels <- readr::read_tsv(
here("exps/20231009_multiome_analysis_v2/03-integration/label_map.tsv")) %>%
tibble::rowid_to_column(var = "Order")
## Rows: 60 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Class, Label, Color, Label_harmonized, Label_chrombpnet
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
palette_labels <- labels %>% dplyr::select(Label, Color) %>% tibble::deframe()
palette_labels_harmonized <- labels %>% distinct(Label_harmonized, Color) %>%
tibble::deframe()
labels
Idents(hippocampus) <- "seurat_clusters"
p0 <- plot_dr(hippocampus, color_by = "wsnn_batch_res.0.6", point_size = 0.2,
label = TRUE, reduction = "harm.wnn.umap_batch",
label_size = 5, legend = FALSE,
title = glue("Hippocampus, n = {ncol(hippocampus)}")) + xlab("UMAP 1") +
ylab("UMAP 2")
Idents(hippocampus) <- "predicted.cluster_broad"
p1 <- plot_dr(hippocampus, color_by = "predicted.subclass__humancortexref",
point_size = 0.2, label = TRUE, reduction = "harm.wnn.umap_batch",
colors = palette_labels,
label_size = 5, legend = FALSE,
title = glue("Hippocampus, n = {ncol(hippocampus)}, cortex ref")) +
xlab("UMAP 1") + ylab("UMAP 2")
p2 <- plot_dr(hippocampus, color_by = "predicted.cluster_broad",
point_size = 0.2, label = TRUE, reduction = "harm.wnn.umap_batch",
colors = palette_labels, label_size = 5, legend = FALSE,
title = glue("Hippocampus, n = {ncol(hippocampus)}, hippocampus ref"))
p0 + p1 + p2
Tabulate predictions with different references:
# tabulate predictions
df_compare_refs <- hippocampus@meta.data %>%
dplyr::select(Human_ctx = predicted.subclass__humancortexref,
Human_hippocampus = predicted.cluster_broad) %>%
group_by(Human_ctx) %>%
mutate(N_per_ctx_label = n()) %>%
ungroup() %>%
group_by(Human_ctx, Human_hippocampus, N_per_ctx_label) %>%
summarize(N_per_hipp_label = n()) %>%
mutate(Prop_per_ctx_label = N_per_hipp_label / N_per_ctx_label) %>%
arrange(desc(Prop_per_ctx_label)) %>%
dplyr::select(-N_per_ctx_label, -N_per_hipp_label)
## `summarise()` has grouped output by 'Human_ctx', 'Human_hippocampus'. You can
## override using the `.groups` argument.
df_compare_refs
# make into a matrix
df_compare_refs <- df_compare_refs %>%
dplyr::select(Human_ctx, Human_hippocampus, Prop_per_ctx_label) %>%
pivot_wider(names_from = Human_hippocampus, values_from = Prop_per_ctx_label,
values_fill = 0) %>% as.data.frame()
rownames(df_compare_refs) <- df_compare_refs$Human_ctx
df_compare_refs <- df_compare_refs[, -1]
df_compare_refs
Plot comparison between references:
# plot -- where rows indicate the cortex ref and cols indicate the hippocampus ref
pheatmap::pheatmap(df_compare_refs,
cluster_rows = FALSE,
cluster_cols = FALSE,
scale = "none",
cellwidth = 30,
cellheight = 30,
fontsize_row = 15,
fontsize_col = 15,
display_numbers = TRUE,
number_color = "white",
main = "Proportion of cells per cortex reference class (rows) assigned to each hippocampus reference class (cols)",
)
plot_dr(hippocampus, color_by = "predicted.cell_type_percluster",
point_size = 0.2, label = TRUE, reduction = "harm.wnn.umap_batch",
colors = palette_labels, label_size = 5, legend = FALSE,
title = glue("Hippocampus, n = {ncol(hippocampus)} (majority per cluster, using hippocampus ref)")) +
xlab("UMAP 1") + ylab("UMAP 2")
p1 <- hippocampus@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.subclass__humancortexref), position = "fill") +
scale_fill_manual(values = palette_labels) +
ggtitle("Cluster breakdown by cortex ref predictions")
p2 <- hippocampus@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.cluster_broad), position = "fill") +
scale_fill_manual(values = palette_labels) +
ggtitle("Cluster breakdown by hippocampus ref predictions")
p3 <- hippocampus@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.cell_type_percluster), position = "fill") +
scale_fill_manual(values = palette_labels) +
ggtitle("Cluster breakdown by hippocampus ref predictions, majority per cluster")
cowplot::plot_grid(p1, p2, p3, ncol = 1, axis = "rl", align = "v")
Add in broader labels:
# add in broader cell class from labels dataframe column
hippocampus@meta.data$predicted.cell_class <- hippocampus@meta.data$predicted.cluster_broad
hippocampus@meta.data$predicted.cell_class <- plyr::mapvalues(
hippocampus@meta.data$predicted.cell_class, from = labels$Label,
to = labels$Class)
## The following `from` values were not present in `x`: Micro-PVM, VLMC_VSMC, Vip, Sst, Sst Chodl, Pvalb, Lamp5, Sncg, ExN, L2-5 IT, L2-6 IT, L2/3 IT, L5 IT, L5 ET, L5/6 NP, L6b, L6 CT and L6b, L6 IT, L6 IT Car3, L6 CT, DG, CA1-3_and_subiculum, SUB, Unknown
# get the most common cell class for each cluster
df_cluster_celltype <- hippocampus@meta.data %>%
dplyr::select(seurat_clusters, predicted.cell_class) %>%
group_by(seurat_clusters, predicted.cell_class) %>%
summarise(n = n()) %>%
top_n(1, n)
## `summarise()` has grouped output by 'seurat_clusters'. You can override using
## the `.groups` argument.
# add it back to the object
hippocampus@meta.data$predicted.cell_class_percluster <- plyr::mapvalues(
hippocampus@meta.data$seurat_clusters, from = df_cluster_celltype$seurat_clusters,
to = df_cluster_celltype$predicted.cell_class)
## The following `from` values were not present in `x`: 31
plot_dr(hippocampus, color_by = "predicted.cluster_broad", point_size = 0.2,
label = FALSE, reduction = "harm.wnn.umap_batch", colors = palette_labels,
cells = WhichCells(hippocampus,
expr = predicted.cell_class == "Glutamatergic"),
label_size = 5, legend = FALSE,
title = "Hippocampus, highlighting glutamatergic cells")
hippocampus_glutamatergic <- subset(hippocampus, cells = WhichCells(
hippocampus, expr = predicted.cell_class_percluster == "Glutamatergic"))
table(hippocampus_glutamatergic$predicted.cell_class_percluster)
##
## Non-neuronal Glutamatergic GABAergic
## 0 27666 0
# call subcluster_seurat() function in functions_r/scRNAseq.R
hippocampus_glutamatergic <- subcluster_seurat(hippocampus_glutamatergic,
key = "Glutamatergic",
resolution = 1)
## Performing TF-IDF normalization
## Running SVD
## Scaling cell embeddings
cowplot::plot_grid(umap(hippocampus_glutamatergic,
color_by = "predicted.cluster_broad",
colors = palette_labels),
umap(hippocampus_glutamatergic,
color_by = "Clusters_subset_Glutamatergic"))
# added by James: bar plot for cell type proportion per cluster
hippocampus_glutamatergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.cluster_broad), position = "fill") +
scale_fill_manual(values = palette_labels) +
ylab('proportion')+
ggtitle("hippocampus cluster breakdown for glutamatergic")
table(hippocampus_glutamatergic@meta.data$seurat_clusters)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 3756 1811 1799 1710 1462 1455 1346 1314 1200 1129 1009 888 862 708 643 550
## 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
## 545 526 505 458 394 392 371 365 358 350 328 324 280 260 165 161
## 33 34 35
## 130 62 50
# for each subcluster, first get the majority vote. Then, if any clusters have
# the same majority, add a numeric suffix (1, 2, etc)
# get the most common cell class for each cluster
df_glutamatergic <- hippocampus_glutamatergic@meta.data %>%
dplyr::select(Clusters_subset_Glutamatergic, predicted.cluster_broad) %>%
group_by(Clusters_subset_Glutamatergic, predicted.cluster_broad) %>%
dplyr::summarise(n_per = dplyr::n()) %>%
top_n(1, n_per) %>%
# add suffixes to clusters with the same majority vote, only if there's more than one cluster
group_by(predicted.cluster_broad) %>%
dplyr::mutate(n_per_label = dplyr::n()) %>%
dplyr::mutate(new_label = case_when(
n_per_label > 1 ~ paste0(predicted.cluster_broad, "_", row_number()),
TRUE ~ predicted.cluster_broad
))
## `summarise()` has grouped output by 'Clusters_subset_Glutamatergic'. You can
## override using the `.groups` argument.
df_glutamatergic
hippocampus_glutamatergic@meta.data$cell_type_final <- plyr::mapvalues(
hippocampus_glutamatergic@meta.data$Clusters_subset_Glutamatergic,
from = df_glutamatergic$Clusters_subset_Glutamatergic,
to = df_glutamatergic$new_label)
Idents(hippocampus_glutamatergic) <- "cell_type_final"
plot_dr(hippocampus_glutamatergic, color_by = "predicted.cluster_broad",
point_size = 0.2, label = TRUE, reduction = "harm.wnn.umap_batch",
colors = palette_labels, label_size = 5, legend = TRUE,
title = glue("Hippocampus glutamatergic neurons, final cell type labels, \nn = {ncol(hippocampus_glutamatergic)}"))
saveRDS(hippocampus_glutamatergic, file = file.path(
merged_dir, "Hippocampus.subset_glutamatergic.seurat.Rds"))
# call markers on subsetted object
markers_glutamatergic <- FindAllMarkers(hippocampus_glutamatergic,
only.pos = TRUE, min.pct = 0.25,
logfc.threshold = 0.25)
## Calculating cluster CA3_1
## Calculating cluster CA3_2
## Calculating cluster DG GC_1
## Calculating cluster DG GC_2
## Calculating cluster DG GC_3
## Calculating cluster DG GC_4
## Calculating cluster CA3_3
## Calculating cluster DG GC_5
## Calculating cluster DG GC_6
## Calculating cluster CA1 dorsal_1
## Calculating cluster DG GC_7
## Calculating cluster DG GC_8
## Calculating cluster CA1 dorsal_2
## Calculating cluster Oligo
## Calculating cluster CA3_4
## Calculating cluster DG GC_9
## Calculating cluster DG GC_10
## Calculating cluster DG GC_11
## Calculating cluster DG GC_12
## Calculating cluster DG GC_13
## Calculating cluster DG GC_14
## Calculating cluster DG GC_15
## Calculating cluster DG GC_16
## Calculating cluster EC L2_1
## Calculating cluster DG GC_17
## Calculating cluster EC L2_2
## Calculating cluster EC L2_3
## Calculating cluster SUB proximal
## Calculating cluster CA1 dorsal_3
## Calculating cluster CA1 ventral
## Calculating cluster DG MC
## Calculating cluster SUB distal
## Calculating cluster DG GC_18
## Calculating cluster EC L6
## Calculating cluster EC L3
write_tsv(markers_glutamatergic, file.path(
merged_dir, "Hippocampus.subset_glutamatergic.markers.tsv"))
top5 <- markers_glutamatergic %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC)
DoHeatmap(subset(x = hippocampus_glutamatergic, downsample = 100),
features = top5$gene) + viridis::scale_fill_viridis()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
hippocampus_glutamatergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = condition), position = "fill")
hippocampus_glutamatergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = sample), position = "fill")
plot_dr(hippocampus, color_by = "predicted.cluster_broad", point_size = 0.2,
label = FALSE, reduction = "harm.wnn.umap_batch", colors = palette_labels,
cells = WhichCells(hippocampus, expr = predicted.cell_class == "GABAergic"),
label_size = 5, legend = FALSE,
title = "Hippocampus, highlighting GABAergic cells")
hippocampus_gabaergic <- subset(hippocampus, cells = WhichCells(
hippocampus, expr = predicted.cell_class_percluster == "GABAergic"))
table(hippocampus_gabaergic$predicted.cell_class_percluster)
##
## Non-neuronal Glutamatergic GABAergic
## 0 0 3244
hippocampus_gabaergic <- subcluster_seurat(hippocampus_gabaergic, key = "GABAergic", resolution = 1)
## Performing TF-IDF normalization
## Running SVD
## Scaling cell embeddings
cowplot::plot_grid(umap(hippocampus_gabaergic,
color_by = "predicted.cluster_broad",
colors = palette_labels),
umap(hippocampus_gabaergic,
color_by = "Clusters_subset_GABAergic"))
# added by James: bar plot for cell type percentage per cluster
hippocampus_gabaergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.cluster_broad), position = "fill") +
scale_fill_manual(values = palette_labels) +
ylab('proportion')+
ggtitle("hippocampus cluster breakdown for gabaergic")
table(hippocampus_gabaergic@meta.data$seurat_clusters)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 402 309 277 265 245 228 196 192 183 176 172 162 120 100 80 80 57
# for each subcluster, first get the majority vote. Then, if any clusters have
# the same majority, add a numeric suffix (1, 2, etc)
# get the most common cell class for each cluster
df_gabaergic <- hippocampus_gabaergic@meta.data %>%
dplyr::select(Clusters_subset_GABAergic, predicted.cluster_broad) %>%
group_by(Clusters_subset_GABAergic, predicted.cluster_broad) %>%
dplyr::summarise(n_per = dplyr::n()) %>%
top_n(1, n_per) %>%
# add suffixes to clusters with the same majority vote, only if there's more than one cluster
group_by(predicted.cluster_broad) %>%
dplyr::mutate(n_per_label = dplyr::n()) %>%
dplyr::mutate(new_label = case_when(
n_per_label > 1 ~ paste0(predicted.cluster_broad, "_", row_number()),
TRUE ~ predicted.cluster_broad
))
## `summarise()` has grouped output by 'Clusters_subset_GABAergic'. You can
## override using the `.groups` argument.
df_gabaergic
hippocampus_gabaergic@meta.data$cell_type_final <- plyr::mapvalues(
hippocampus_gabaergic@meta.data$Clusters_subset_GABAergic,
from = df_gabaergic$Clusters_subset_GABAergic,
to = df_gabaergic$new_label)
Idents(hippocampus_gabaergic) <- "cell_type_final"
saveRDS(hippocampus_gabaergic, file = file.path(
merged_dir, "Hippocampus.subset_gabaergic.seurat.Rds"))
plot_dr(hippocampus_gabaergic, color_by = "predicted.cluster_broad",
point_size = 0.2, label = TRUE, reduction = "harm.wnn.umap_batch",
colors = palette_labels,
label_size = 5, legend = TRUE,
title = glue("Hippocampus GABAergic neurons, final cell type labels, \nn = {ncol(hippocampus_gabaergic)}"))
# call markers on subsetted object
markers_gabaergic <- FindAllMarkers(hippocampus_gabaergic, only.pos = TRUE,
min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster InN LAMP5_1
## Calculating cluster InN PVALB_1
## Calculating cluster InN LAMP5_2
## Calculating cluster InN SST_1
## Calculating cluster InN VIP_1
## Calculating cluster InN VIP_2
## Calculating cluster InN SST_2
## Calculating cluster InN NR2F2_1
## Calculating cluster InN VIP_3
## Calculating cluster InN LAMP5_3
## Calculating cluster InN VIP_4
## Calculating cluster InN VIP_5
## Calculating cluster InN SST_3
## Calculating cluster InN NR2F2_2
## Calculating cluster InN NR2F2_3
## Calculating cluster InN PVALB_2
## Calculating cluster InN VIP_6
write_tsv(markers_gabaergic,
file.path(merged_dir, "Hippocampus.subset_gabaergic.markers.tsv"))
top5 <- markers_gabaergic %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC)
DoHeatmap(subset(x = hippocampus_gabaergic, downsample = 100),
features = top5$gene) + viridis::scale_fill_viridis()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
hippocampus_gabaergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = condition), position = "fill")
hippocampus_gabaergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = sample), position = "fill")
Specify certain subclusters to label as “unknown” due to lack of clear markers or mixed annotations.
# define clusters to drop, then convert the column back to character; and replace those labels
Idents(hippocampus_glutamatergic) <- "seurat_clusters"
clusters_unk_glut <- c('14', '20', '22', '24', '26', '27', '28')
cells_unk_glut <- WhichCells(hippocampus_glutamatergic, idents = clusters_unk_glut)
hippocampus_glutamatergic$cell_type_final <- as.character(
hippocampus_glutamatergic$cell_type_final)
hippocampus_glutamatergic$cell_type_final[cells_unk_glut] <- "Unknown"
Idents(hippocampus_gabaergic) <- "seurat_clusters"
clusters_unk_gaba <- c("4", "11", "17")
cells_unk_gaba <- WhichCells(hippocampus_gabaergic, idents = clusters_unk_gaba)
hippocampus_gabaergic$cell_type_final <- as.character(hippocampus_gabaergic$cell_type_final)
hippocampus_gabaergic$cell_type_final[cells_unk_gaba] <- "Unknown"
# as a default, set the final cell type to the majority cell type per cluster
hippocampus$cell_type_final <- as.character(
hippocampus$predicted.cell_type_percluster)
hippocampus@meta.data$Clusters_all <- hippocampus@meta.data$seurat_clusters
hippocampus@meta.data$Clusters_subset_glutamatergic <- NA
hippocampus@meta.data$Clusters_subset_GABAergic <- NA
# add glutamatergic subclusters into main object
cells_glut <- Cells(hippocampus_glutamatergic)
hippocampus@meta.data[cells_glut, ]$Clusters_subset_glutamatergic <- hippocampus_glutamatergic@meta.data$Clusters_subset_Glutamatergic
hippocampus@meta.data[cells_glut, ]$cell_type_final <- as.character(hippocampus_glutamatergic@meta.data$cell_type_final)
# add gabaergic subclusters into main object
cells_gab <- Cells(hippocampus_gabaergic)
hippocampus@meta.data[cells_gab, ]$Clusters_subset_GABAergic <- hippocampus_gabaergic@meta.data$Clusters_subset_GABAergic
hippocampus@meta.data[cells_gab, ]$cell_type_final <- as.character(hippocampus_gabaergic@meta.data$cell_type_final)
# relabel with harmonized cell type labels
label_map_new <- data.frame(Label_granular = unique(hippocampus@meta.data$cell_type_final)) %>%
mutate(Label_granular_old = Label_granular) %>%
separate(Label_granular, into = c("Label_wo_suffix", "suffix"), sep = "_") %>%
left_join(labels, by = c("Label_wo_suffix" = "Label")) %>%
mutate(Label_granular = case_when(
!is.na(suffix) ~ paste0(Label_harmonized, "_", suffix),
TRUE ~ Label_harmonized
)) %>%
arrange(Order)
hippocampus@meta.data$cell_type_granular <- plyr::mapvalues(
hippocampus@meta.data$cell_type_final,
from = label_map_new$Label_granular_old, to = label_map_new$Label_granular)
hippocampus@meta.data$cell_type_broad <- plyr::mapvalues(
hippocampus@meta.data$cell_type_final, from = label_map_new$Label_granular_old,
to = label_map_new$Label_harmonized)
hippocampus@meta.data$cell_type_final <- NULL
# put the labels in the cell type order from our label map, to get a rational order
hippocampus_order <- label_map_new %>%
dplyr::filter(Label_granular %in% hippocampus$cell_type_granular) %>%
pull(Label_granular) %>% base::unique()
hippocampus_order
## [1] "OPC" "Oligo"
## [3] "Astro" "Microglia PVM"
## [5] "Endo" "Vascular leptomeningeal"
## [7] "VIP_3" "VIP_5"
## [9] "VIP_1" "VIP_2"
## [11] "NR2F2_1" "NR2F2_3"
## [13] "NR2F2_2" "SST_3"
## [15] "SST_2" "PVALB_1"
## [17] "PVALB_2" "LAMP5_1"
## [19] "LAMP5_3" "LAMP5_2"
## [21] "L3" "L6"
## [23] "DG granule_7" "DG granule_5"
## [25] "DG granule_10" "DG granule_9"
## [27] "DG granule_6" "DG granule_11"
## [29] "DG granule_1" "DG granule_4"
## [31] "DG granule_12" "DG granule_14"
## [33] "DG granule_16" "DG granule_8"
## [35] "DG granule_17" "DG granule_3"
## [37] "DG granule_2" "DG granule_18"
## [39] "DG mossy" "CA3_4"
## [41] "CA3_2" "CA3_1"
## [43] "CA3_3" "CA1 dorsal_1"
## [45] "CA1 dorsal_2" "CA1 dorsal_3"
## [47] "CA1 ventral" "Subiculum distal"
## [49] "Unknown"
hippocampus_order_broad <- labels %>%
dplyr::filter(Label_harmonized %in% unique(hippocampus$cell_type_broad)) %>%
pull(Label_harmonized) %>%
base::unique()
hippocampus_order_broad
## [1] "OPC" "Oligo"
## [3] "Astro" "Microglia PVM"
## [5] "Endo" "Vascular leptomeningeal"
## [7] "VIP" "NR2F2"
## [9] "SST" "PVALB"
## [11] "LAMP5" "L3"
## [13] "L6" "DG granule"
## [15] "DG mossy" "CA3"
## [17] "CA1 dorsal" "CA1 ventral"
## [19] "Subiculum distal" "Unknown"
# put the labels in the cell type order from our label map, to get a rational order
hippocampus$cell_type_broad <- factor(hippocampus$cell_type_broad,
levels = hippocampus_order_broad)
hippocampus$cell_type_granular <- factor(hippocampus$cell_type_granular,
levels = hippocampus_order)
Idents(hippocampus) <- "cell_type_granular"
plot_dr(hippocampus, color_by = "cell_type_broad", point_size = 0.2, label = TRUE,
reduction = "harm.wnn.umap_batch", colors = palette_labels_harmonized,
label_size = 5, legend = TRUE,
title = glue("DLPFC AD, final cell type labels, n = {ncol(hippocampus)}"))
Idents(hippocampus) <- "cell_type_broad"
plot_dr(hippocampus, color_by = "cell_type_broad", point_size = 0.2, label = TRUE,
reduction = "harm.wnn.umap_batch", colors = palette_labels_harmonized,
label_size = 5, legend = TRUE,
title = glue("hippocampus, final cell type labels, broad, n = {ncol(hippocampus)}"))
Plot cluster numbers:
hippocampus@meta.data %>%
group_by(cell_type_broad, cell_type_granular) %>%
count() %>%
ggplot(aes(x = cell_type_granular, y = n)) +
geom_bar(aes(fill = cell_type_broad), stat = "identity") +
geom_text(aes(label = n, color = cell_type_broad), hjust = -0.1, angle = 90) +
scale_fill_manual(values = palette_labels_harmonized) +
scale_color_manual(values = palette_labels_harmonized) +
rotate_x() +
ggtitle("Number of cells per granular cluster")
hippocampus@meta.data %>%
group_by(cell_type_broad) %>%
count() %>%
ggplot(aes(x = cell_type_broad, y = log10(n))) +
geom_bar(aes(fill = cell_type_broad), stat = "identity") +
geom_text(aes(label = n, color = cell_type_broad), hjust = -0.1, angle = 90) +
scale_fill_manual(values = palette_labels_harmonized) +
scale_color_manual(values = palette_labels_harmonized) +
rotate_x() +
ggtitle("Number of cells per broad cluster")
hippocampus@meta.data %>%
group_by(cell_type_broad) %>%
summarize(sum_ncount = sum(nCount_ATAC)) %>%
ggplot(aes(x = cell_type_broad, y = log10(sum_ncount))) +
geom_bar(aes(fill = cell_type_broad), stat = "identity") +
geom_text(aes(label = sum_ncount, color = cell_type_broad), hjust = -0.1,
angle = 90) +
scale_fill_manual(values = palette_labels_harmonized) +
scale_color_manual(values = palette_labels_harmonized) +
rotate_x() +
ggtitle("Number of fragments in peaks, per broad cluster") +
coord_cartesian(ylim = c(0, 10))
DotPlot(hippocampus, assay = "RNA", group.by = "cell_type_granular", features = rev(c(
"PDGFRA",
"PLP1",
"AQP4",
"LY86",
"SKAP1",
"VWF",
"GAD1",
"VIP",
"NR2F2",
"SST",
"PVALB",
"LAMP5",
"SLC17A6",
"PROX1", # DG
"FGF5",
"POU3F1"
))) +
ylab("Cell type") +
coord_flip() +
rotate_x()
DotPlot(hippocampus, assay = "RNA", group.by = "cell_type_broad", features = rev(c(
"PDGFRA",
"PLP1",
"AQP4",
"LY86",
"SKAP1",
"VWF",
"GAD1",
"VIP",
"NR2F2",
"SST",
"PVALB",
"LAMP5",
"SLC17A6",
"PROX1", # DG
"FGF5",
"POU3F1"
))) +
ylab("Cell type") +
coord_flip() +
rotate_x()
hippocampus_meta <- hippocampus@meta.data
saveRDS(hippocampus_meta, file = file.path(
merged_dir, "Hippocampus.integrated.batch.analyzed.cell_metadata.labeled.rds"))
Idents(hippocampus) <- "cell_type_broad"
markers_hippocampus <- FindAllMarkers(hippocampus, only.pos = TRUE,
logfc.threshold = 0.25,
max.cells.per.ident = 1000) %>%
mutate(pct.diff = pct.1 - pct.2)
## Calculating cluster OPC
## Calculating cluster Oligo
## Calculating cluster Astro
## Calculating cluster Microglia PVM
## Calculating cluster Endo
## Calculating cluster Vascular leptomeningeal
## Calculating cluster VIP
## Calculating cluster NR2F2
## Calculating cluster SST
## Calculating cluster PVALB
## Calculating cluster LAMP5
## Calculating cluster L2
## Calculating cluster L3
## Calculating cluster L6
## Calculating cluster DG granule
## Calculating cluster DG mossy
## Calculating cluster CA3
## Calculating cluster CA1 dorsal
## Calculating cluster CA1 ventral
## Calculating cluster Subiculum distal
## Calculating cluster Subiculum proximal
## Calculating cluster Unknown
write_tsv(markers_hippocampus, file.path(merged_dir, "Hippocampus.markers.cell_type_broad.tsv"))
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/libf77blas.so.3.10.3
## LAPACK: /users/guozy/miniconda3/envs/psych/lib/libopenblasp-r0.3.27.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 viridis_0.6.5 viridisLite_0.4.2 Signac_1.13.0
## [5] ggplot2_3.5.1 SeuratObject_5.0.0 Seurat_4.3.0 glue_1.7.0
## [9] readr_2.1.5 purrr_1.0.2 dplyr_1.1.4 tidyr_1.3.0
## [13] here_1.0.1 Cairo_1.6-2
##
## loaded via a namespace (and not attached):
## [1] spatstat.univar_3.0-0 spam_2.10-0 fastmatch_1.1-4
## [4] plyr_1.8.9 igraph_2.0.3 lazyeval_0.2.2
## [7] sp_2.1-4 splines_4.2.0 BiocParallel_1.32.6
## [10] listenv_0.9.1 scattermore_1.2 GenomeInfoDb_1.34.9
## [13] digest_0.6.36 htmltools_0.5.8.1 fansi_1.0.6
## [16] magrittr_2.0.3 tensor_1.5 cluster_2.1.6
## [19] ROCR_1.0-11 limma_3.54.2 tzdb_0.4.0
## [22] Biostrings_2.66.0 globals_0.16.3 matrixStats_1.3.0
## [25] vroom_1.6.5 spatstat.sparse_3.1-0 colorspace_2.1-0
## [28] rappdirs_0.3.3 ggrepel_0.9.5 xfun_0.46
## [31] crayon_1.5.3 RCurl_1.98-1.16 jsonlite_1.8.8
## [34] progressr_0.14.0 spatstat.data_3.1-2 survival_3.7-0
## [37] zoo_1.8-12 polyclip_1.10-6 gtable_0.3.5
## [40] zlibbioc_1.44.0 XVector_0.38.0 leiden_0.4.3.1
## [43] future.apply_1.11.2 BiocGenerics_0.44.0 abind_1.4-5
## [46] scales_1.3.0 pheatmap_1.0.12 spatstat.random_3.3-1
## [49] miniUI_0.1.1.1 Rcpp_1.0.11 xtable_1.8-4
## [52] reticulate_1.38.0 bit_4.0.5 dotCall64_1.1-1
## [55] stats4_4.2.0 htmlwidgets_1.6.4 httr_1.4.7
## [58] ica_1.0-3 farver_2.1.2 pkgconfig_2.0.3
## [61] sass_0.4.9 uwot_0.2.2 deldir_2.0-4
## [64] utf8_1.2.4 labeling_0.4.3 tidyselect_1.2.1
## [67] rlang_1.1.4 reshape2_1.4.4 later_1.3.2
## [70] munsell_0.5.1 tools_4.2.0 cachem_1.1.0
## [73] cli_3.6.3 generics_0.1.3 ggridges_0.5.6
## [76] evaluate_0.24.0 stringr_1.5.1 fastmap_1.2.0
## [79] yaml_2.3.9 goftest_1.2-3 bit64_4.0.5
## [82] knitr_1.48 fitdistrplus_1.2-1 RANN_2.6.1
## [85] pbapply_1.7-2 future_1.33.2 nlme_3.1-165
## [88] mime_0.12 RcppRoll_0.3.1 compiler_4.2.0
## [91] plotly_4.10.4 png_0.1-8 spatstat.utils_3.0-5
## [94] tibble_3.2.1 bslib_0.7.0 stringi_1.8.4
## [97] highr_0.11 lattice_0.22-6 Matrix_1.6-1.1
## [100] vctrs_0.6.5 pillar_1.9.0 lifecycle_1.0.4
## [103] spatstat.geom_3.3-2 lmtest_0.9-40 jquerylib_0.1.4
## [106] RcppAnnoy_0.0.22 data.table_1.15.4 cowplot_1.1.3
## [109] bitops_1.0-7 irlba_2.3.5.1 httpuv_1.6.15
## [112] patchwork_1.2.0 GenomicRanges_1.50.2 R6_2.5.1
## [115] promises_1.3.0 KernSmooth_2.23-24 gridExtra_2.3
## [118] IRanges_2.32.0 parallelly_1.37.1 codetools_0.2-20
## [121] MASS_7.3-56 rprojroot_2.0.4 withr_3.0.0
## [124] sctransform_0.4.0 Rsamtools_2.14.0 S4Vectors_0.36.2
## [127] GenomeInfoDbData_1.2.9 parallel_4.2.0 hms_1.1.3
## [130] grid_4.2.0 rmarkdown_2.27 Rtsne_0.17
## [133] spatstat.explore_3.3-1 shiny_1.8.1.1