exp_id <- "20231009_multiome_analysis/03-integration" # determines the name of the cache folder
doc_id <- "05b" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out <- here::here("outs", exp_id, doc_id, "/"); dir.create(out, recursive = TRUE)
figout <- here::here("outs", exp_id, doc_id, "figures/")
cache <- file.path("~/scratch/cache", exp_id, doc_id, "/")
Here, we’ll assemble the final annotations for each dataset. Consulting several resources:
Hodge, R.D., Bakken, T.E., Miller, J.A., Smith, K.A., Barkan, E.R., Graybuck, L.T., Close, J.L., Long, B., Johansen, N., Penn, O., et al. (2019). Conserved cell types with divergent features in human versus mouse cortex. Nature 573, 61–68. 10.1038/s41586-019-1506-7.
Bakken, T.E., Jorstad, N.L., Hu, Q., Lake, B.B., Tian, W., Kalmbach, B.E., Crow, M., Hodge, R.D., Krienen, F.M., Sorensen, S.A., et al. (2021). Comparative cellular analysis of motor cortex in human, marmoset and mouse. Nature 598, 111–119. 10.1038/s41586-021-03465-8.
Franjic, D., Skarica, M., Ma, S., Arellano, J.I., Tebbenkamp, A.T.N., Choi, J., Xu, C., Li, Q., Morozov, Y.M., Andrijevic, D., et al. (2022). Transcriptomic taxonomy and neurogenic trajectories of adult human, macaque, and pig hippocampal and entorhinal cells. Neuron 110, 452–469.e14. 10.1016/j.neuron.2021.10.036.
In general, to finalize cell type annotation, we will subcluster glutamatergic and GABAergic neurons separately.
library(here)
library(tidyr)
library(dplyr)
library(purrr)
library(readr)
library(glue)
library(Seurat)
library(ggplot2)
library(Signac)
source(here("functions_r/scRNAseq.R"))
ggplot2::theme_set(theme_min())
merged_dir <- "/oak/stanford/groups/akundaje/projects/psychencode/outs/20231009_multiome_analysis/03-integration/merged_objects"
dlpfc <- readRDS(file.path(merged_dir, "DLPFC.integrated.batch.analyzed.rds"))
We manually assigned colors for each broad label, taken from the following studies to approximately match the color scheme used in the Allen Institute papers.
Conventions:
predicted.subclass
is the per-cell automated annotation
from Azimuthpredicted.cell_type_percluster
is the majority vote of
the per-cell label, within each cluster# load labels and color scheme
labels <- readr::read_tsv(
here("exps/20231009_multiome_analysis_v2/03-integration/label_map.tsv")) %>%
tibble::rowid_to_column(var = "Order")
## Rows: 60 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Class, Label, Color, Label_harmonized, Label_chrombpnet
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
palette_labels <- labels %>% dplyr::select(Label, Color) %>% tibble::deframe()
palette_labels_harmonized <- labels %>% distinct(Label_harmonized, Color) %>%
tibble::deframe()
labels
Idents(dlpfc) <- "seurat_clusters"
p0 <- plot_dr(dlpfc, color_by = "wsnn_batch_res.0.6", point_size = 0.2,
label = TRUE, reduction = "harm.wnn.umap_batch",
label_size = 5, legend = FALSE,
title = glue("DLPFC, n = {ncol(dlpfc)}")) + xlab("UMAP 1") + ylab("UMAP 2")
Idents(dlpfc) <- "predicted.subclass"
p1 <- plot_dr(dlpfc, color_by = "predicted.cell_type_percluster",
point_size = 0.2, label = TRUE, reduction = "harm.wnn.umap_batch",
colors = palette_labels, label_size = 5, legend = FALSE,
title = glue("DLPFC, n = {ncol(dlpfc)}")) + xlab("UMAP 1") +
ylab("UMAP 2")
p2 <- plot_dr(dlpfc, color_by = "predicted.subclass", point_size = 0.2,
label = TRUE, reduction = "harm.wnn.umap_batch",
colors = palette_labels, label_size = 5, legend = FALSE,
title = glue("DLPFC, n = {ncol(dlpfc)}"))
p0 + p1 + p2
dlpfc@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.cell_type_percluster), position = "fill") +
scale_fill_manual(values = palette_labels) +
ggtitle("DLPFC cluster breakdown by human cortex predictions (majority per cluster)")
dlpfc@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.subclass), position = "fill") +
scale_fill_manual(values = palette_labels) +
ggtitle("DLPFC cluster breakdown by human cortex predictions")
dlpfc@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.cell_type_percluster)) +
scale_fill_manual(values = palette_labels) +
ggtitle("DLPFC cluster breakdown by human cortex predictions (majority per cluster)")
dlpfc@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.subclass)) +
scale_fill_manual(values = palette_labels) +
ggtitle("DLPFC cluster breakdown by human cortex predictions")
# add in broader cell class from labels dataframe column
dlpfc@meta.data$predicted.cell_class <- dlpfc@meta.data$predicted.subclass
dlpfc@meta.data$predicted.cell_class <- plyr::mapvalues(
dlpfc@meta.data$predicted.cell_class, from = labels$Label, to = labels$Class)
## The following `from` values were not present in `x`: COP, Micro, Macro, Myeloid, T, aEndo, PC, vSMC, aSMC, VLMC_VSMC, InN VIP, InN NR2F2, InN SST, InN PVALB, InN LAMP5, InN LHX6, InN MEIS2, ExN, L2-5 IT, L2-6 IT, EC L2, EC L3, EC L56, EC L5, L6 CT and L6b, EC L6, EC L6b, DG GC, DG MC, DG, CA3, CA2, CA1 dorsal, CA1 ventral, CA1-3_and_subiculum, SUB distal, SUB proximal, SUB, CR RELN, Unknown
# get the most common cell class for each cluster
df_cluster_celltype <- dlpfc@meta.data %>%
dplyr::select(seurat_clusters, predicted.cell_class) %>%
group_by(seurat_clusters, predicted.cell_class) %>%
summarise(n = n()) %>%
top_n(1, n)
## `summarise()` has grouped output by 'seurat_clusters'. You can override using
## the `.groups` argument.
# add it back to the object
dlpfc@meta.data$predicted.cell_class_percluster <- plyr::mapvalues(
dlpfc@meta.data$seurat_clusters, from = df_cluster_celltype$seurat_clusters,
to = df_cluster_celltype$predicted.cell_class)
plot_dr(dlpfc, color_by = "predicted.subclass", point_size = 0.2, label = FALSE,
reduction = "harm.wnn.umap_batch", colors = palette_labels,
cells = WhichCells(dlpfc, expr = predicted.cell_class == "Glutamatergic"),
label_size = 5, legend = FALSE,
title = "DLPFC, highlighting glutamatergic cells")
dlpfc_glutamatergic <- subset(dlpfc, cells = WhichCells(
dlpfc, expr = predicted.cell_class_percluster == "Glutamatergic"))
table(dlpfc_glutamatergic$predicted.cell_class_percluster)
##
## Non-neuronal Glutamatergic GABAergic
## 0 43028 0
# call subcluster_seurat() function in functions_r/scRNAseq.R
dlpfc_glutamatergic <- subcluster_seurat(dlpfc_glutamatergic,
key = "Glutamatergic", resolution = 0.2)
## Performing TF-IDF normalization
## Running SVD
## Scaling cell embeddings
cowplot::plot_grid(umap(dlpfc_glutamatergic, color_by = "predicted.subclass",
colors = palette_labels),
umap(dlpfc_glutamatergic,
color_by = "Clusters_subset_Glutamatergic"))
# added by James: bar plot for cell type proportion per cluster
dlpfc_glutamatergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.subclass), position = "fill") +
scale_fill_manual(values = palette_labels) +
ylab('proportion')+
ggtitle("dlpfc cluster breakdown for glutamatergic")
# for each subcluster, first get the majority vote. Then, if any clusters have
# the same majority, add a numeric suffix (1, 2, etc)
# get the most common cell class for each cluster
df_glutamatergic <- dlpfc_glutamatergic@meta.data %>%
dplyr::select(Clusters_subset_Glutamatergic, predicted.subclass) %>%
group_by(Clusters_subset_Glutamatergic, predicted.subclass) %>%
dplyr::summarise(n_per = dplyr::n()) %>%
top_n(1, n_per) %>%
# add suffixes to clusters with the same majority vote, only if there's more than one cluster
group_by(predicted.subclass) %>%
dplyr::mutate(n_per_label = dplyr::n()) %>%
dplyr::mutate(new_label = case_when(
n_per_label > 1 ~ paste0(predicted.subclass, "_", row_number()),
TRUE ~ predicted.subclass
))
## `summarise()` has grouped output by 'Clusters_subset_Glutamatergic'. You can
## override using the `.groups` argument.
df_glutamatergic
dlpfc_glutamatergic@meta.data$cell_type_final <- plyr::mapvalues(
dlpfc_glutamatergic@meta.data$Clusters_subset_Glutamatergic,
from = df_glutamatergic$Clusters_subset_Glutamatergic,
to = df_glutamatergic$new_label)
Idents(dlpfc_glutamatergic) <- "cell_type_final"
plot_dr(dlpfc_glutamatergic, color_by = "predicted.subclass", point_size = 0.2,
label = TRUE, reduction = "harm.wnn.umap_batch", colors = palette_labels,
label_size = 5, legend = TRUE,
title = glue("DLPFC glutamatergic neurons, final cell type labels, \nn = {ncol(dlpfc_glutamatergic)}"))
saveRDS(dlpfc_glutamatergic, file = file.path(
merged_dir, "DLPFC.subset_glutamatergic.seurat.Rds"))
# call markers on subsetted object
markers_glutamatergic <- FindAllMarkers(dlpfc_glutamatergic, only.pos = TRUE,
min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster L2/3 IT_1
## Calculating cluster L5 IT_1
## Calculating cluster L2/3 IT_2
## Calculating cluster L6 IT_1
## Calculating cluster L5 IT_2
## Calculating cluster L2/3 IT_3
## Calculating cluster L6b
## Calculating cluster L6 CT_1
## Calculating cluster L2/3 IT_4
## Calculating cluster L2/3 IT_5
## Calculating cluster L5/6 NP
## Calculating cluster L5 IT_3
## Calculating cluster L6 IT Car3
## Calculating cluster L5 IT_4
## Calculating cluster L6 IT_2
## Calculating cluster L5 IT_5
## Calculating cluster L5 ET
## Calculating cluster L6 CT_2
write_tsv(markers_glutamatergic, file.path(
merged_dir, "DLPFC.subset_glutamatergic.markers.tsv"))
top5 <- markers_glutamatergic %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC)
DoHeatmap(subset(x = dlpfc_glutamatergic, downsample = 100),
features = top5$gene) + viridis::scale_fill_viridis()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
dlpfc_glutamatergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = condition), position = "fill")
dlpfc_glutamatergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = sample), position = "fill")
plot_dr(dlpfc, color_by = "predicted.subclass", point_size = 0.2, label = FALSE,
reduction = "harm.wnn.umap_batch", colors = palette_labels,
cells = WhichCells(dlpfc, expr = predicted.cell_class == "GABAergic"),
label_size = 5, legend = FALSE,
title = "DLPFC, highlighting GABAergic cells")
dlpfc_gabaergic <- subset(dlpfc, cells = WhichCells(
dlpfc, expr = predicted.cell_class_percluster == "GABAergic"))
table(dlpfc_gabaergic$predicted.cell_class_percluster)
##
## Non-neuronal Glutamatergic GABAergic
## 0 0 19066
dlpfc_gabaergic <- subcluster_seurat(dlpfc_gabaergic, key = "GABAergic")
## Performing TF-IDF normalization
## Running SVD
## Scaling cell embeddings
cowplot::plot_grid(umap(dlpfc_gabaergic, color_by = "predicted.subclass",
colors = palette_labels), umap(
dlpfc_gabaergic,
color_by = "Clusters_subset_GABAergic"))
# added by James: bar plot for cell type percentage per cluster
dlpfc_gabaergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = predicted.subclass), position = "fill") +
scale_fill_manual(values = palette_labels) +
ylab('proportion')+
ggtitle("dlpfc cluster breakdown for gabaergic")
# for each subcluster, first get the majority vote. Then, if any clusters have
# the same majority, add a numeric suffix (1, 2, etc)
# get the most common cell class for each cluster
df_gabaergic <- dlpfc_gabaergic@meta.data %>%
dplyr::select(Clusters_subset_GABAergic, predicted.subclass) %>%
group_by(Clusters_subset_GABAergic, predicted.subclass) %>%
dplyr::summarise(n_per = dplyr::n()) %>%
top_n(1, n_per) %>%
# add suffixes to clusters with the same majority vote, only if there's more than one cluster
group_by(predicted.subclass) %>%
dplyr::mutate(n_per_label = dplyr::n()) %>%
dplyr::mutate(new_label = case_when(
n_per_label > 1 ~ paste0(predicted.subclass, "_", row_number()),
TRUE ~ predicted.subclass
))
## `summarise()` has grouped output by 'Clusters_subset_GABAergic'. You can
## override using the `.groups` argument.
df_gabaergic
dlpfc_gabaergic@meta.data$cell_type_final <- plyr::mapvalues(
dlpfc_gabaergic@meta.data$Clusters_subset_GABAergic,
from = df_gabaergic$Clusters_subset_GABAergic,
to = df_gabaergic$new_label)
Idents(dlpfc_gabaergic) <- "cell_type_final"
saveRDS(dlpfc_gabaergic, file = file.path(merged_dir,
"DLPFC.subset_gabaergic.seurat.Rds"))
plot_dr(dlpfc_gabaergic, color_by = "predicted.subclass", point_size = 0.2,
label = TRUE, reduction = "harm.wnn.umap_batch", colors = palette_labels,
label_size = 5, legend = TRUE,
title = glue("DLPFC GABAergic neurons, final cell type labels, \nn = {ncol(dlpfc_gabaergic)}"))
# call markers on subsetted object
markers_gabaergic <- FindAllMarkers(dlpfc_gabaergic, only.pos = TRUE,
min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster Pvalb_1
## Calculating cluster Sst_1
## Calculating cluster Vip_1
## Calculating cluster Vip_2
## Calculating cluster Sst_2
## Calculating cluster Lamp5_1
## Calculating cluster Sncg_1
## Calculating cluster Sst_3
## Calculating cluster Pvalb_2
## Calculating cluster Lamp5_2
## Calculating cluster Sncg_2
## Calculating cluster Sst_4
## Calculating cluster Sst_5
## Calculating cluster Sst Chodl
## Calculating cluster Pvalb_3
write_tsv(markers_gabaergic, file.path(merged_dir,
"DLPFC.subset_gabaergic.markers.tsv"))
top5 <- markers_gabaergic %>%
group_by(cluster) %>%
slice_max(n = 5, order_by = avg_log2FC)
DoHeatmap(subset(x = dlpfc_gabaergic, downsample = 100),
features = top5$gene) + viridis::scale_fill_viridis()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
dlpfc_gabaergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = condition), position = "fill")
dlpfc_gabaergic@meta.data %>%
ggplot(aes(x = seurat_clusters)) +
geom_bar(aes(fill = sample), position = "fill")
Specify certain subclusters to label as “unknown” due to lack of clear markers or mixed annotations.
# define clusters to drop, then convert the column back to character; and replace those labels
Idents(dlpfc_glutamatergic) <- "seurat_clusters"
clusters_unk_glut <- c("3", "4", "14")
cells_unk_glut <- WhichCells(dlpfc_glutamatergic, idents = clusters_unk_glut)
dlpfc_glutamatergic$cell_type_final <- as.character(dlpfc_glutamatergic$cell_type_final)
dlpfc_glutamatergic$cell_type_final[cells_unk_glut] <- "Unknown"
Idents(dlpfc_gabaergic) <- "seurat_clusters"
clusters_unk_gaba <- c("13", "14", "15")
cells_unk_gaba <- WhichCells(dlpfc_gabaergic, idents = clusters_unk_gaba)
dlpfc_gabaergic$cell_type_final <- as.character(dlpfc_gabaergic$cell_type_final)
dlpfc_gabaergic$cell_type_final[cells_unk_gaba] <- "Unknown"
# as a default, set the final cell type to the majority cell type per cluster
dlpfc$cell_type_final <- as.character(dlpfc$predicted.cell_type_percluster)
dlpfc@meta.data$Clusters_all <- dlpfc@meta.data$seurat_clusters
dlpfc@meta.data$Clusters_subset_glutamatergic <- NA
dlpfc@meta.data$Clusters_subset_GABAergic <- NA
# add glutamatergic subclusters into main object
cells_glut <- Cells(dlpfc_glutamatergic)
dlpfc@meta.data[cells_glut, ]$Clusters_subset_glutamatergic <- dlpfc_glutamatergic@meta.data$Clusters_subset_Glutamatergic
dlpfc@meta.data[cells_glut, ]$cell_type_final <- as.character(
dlpfc_glutamatergic@meta.data$cell_type_final)
# add gabaergic subclusters into main object
cells_gab <- Cells(dlpfc_gabaergic)
dlpfc@meta.data[cells_gab, ]$Clusters_subset_GABAergic <- dlpfc_gabaergic@meta.data$Clusters_subset_GABAergic
dlpfc@meta.data[cells_gab, ]$cell_type_final <- as.character(
dlpfc_gabaergic@meta.data$cell_type_final)
# relabel with harmonized cell type labels
label_map_new <- data.frame(Label_granular = unique(dlpfc@meta.data$cell_type_final)) %>%
mutate(Label_granular_old = Label_granular) %>%
separate(Label_granular, into = c("Label_wo_suffix", "suffix"), sep = "_") %>%
left_join(labels, by = c("Label_wo_suffix" = "Label")) %>%
mutate(Label_granular = case_when(
!is.na(suffix) ~ paste0(Label_harmonized, "_", suffix),
TRUE ~ Label_harmonized
)) %>%
arrange(Order)
dlpfc@meta.data$cell_type_granular <- plyr::mapvalues(dlpfc@meta.data$cell_type_final,
from = label_map_new$Label_granular_old,
to = label_map_new$Label_granular)
dlpfc@meta.data$cell_type_broad <- plyr::mapvalues(dlpfc@meta.data$cell_type_final,
from = label_map_new$Label_granular_old,
to = label_map_new$Label_harmonized)
dlpfc@meta.data$cell_type_final <- NULL
# put the labels in the cell type order from our label map, to get a rational order
dlpfc_order <- label_map_new %>%
dplyr::filter(Label_granular %in% dlpfc$cell_type_granular) %>%
pull(Label_granular) %>% base::unique()
dlpfc_order
## [1] "OPC" "Oligo"
## [3] "Astro" "Microglia PVM"
## [5] "Endo" "Vascular leptomeningeal"
## [7] "VIP_1" "VIP_2"
## [9] "SST_4" "SST_2"
## [11] "SST_1" "SST_3"
## [13] "PVALB_1" "PVALB_2"
## [15] "LAMP5_2" "LAMP5_1"
## [17] "SNCG_2" "SNCG_1"
## [19] "L23 IT_1" "L23 IT_3"
## [21] "L23 IT_4" "L23 IT_5"
## [23] "L5 IT_1" "L5 IT_3"
## [25] "L5 IT_2" "L5 IT_5"
## [27] "L5 ET" "L56 NP"
## [29] "L6b" "L6 IT_2"
## [31] "L6 IT CAR3" "L6 CT_1"
## [33] "L6 CT_2" "Unknown"
dlpfc_order_broad <- labels %>%
dplyr::filter(Label_harmonized %in% unique(dlpfc$cell_type_broad)) %>%
pull(Label_harmonized) %>%
base::unique()
dlpfc_order_broad
## [1] "OPC" "Oligo"
## [3] "Astro" "Microglia PVM"
## [5] "Endo" "Vascular leptomeningeal"
## [7] "VIP" "SST"
## [9] "PVALB" "LAMP5"
## [11] "SNCG" "L23 IT"
## [13] "L5 IT" "L5 ET"
## [15] "L56 NP" "L6b"
## [17] "L6 IT" "L6 IT CAR3"
## [19] "L6 CT" "Unknown"
# put the labels in the cell type order from our label map, to get a rational order
dlpfc$cell_type_broad <- factor(dlpfc$cell_type_broad, levels = dlpfc_order_broad)
dlpfc$cell_type_granular <- factor(dlpfc$cell_type_granular, levels = dlpfc_order)
Idents(dlpfc) <- "cell_type_granular"
plot_dr(dlpfc, color_by = "cell_type_broad", point_size = 0.2, label = TRUE,
reduction = "harm.wnn.umap_batch", colors = palette_labels_harmonized,
label_size = 5, legend = TRUE,
title = glue("DLPFC AD, final cell type labels, n = {ncol(dlpfc)}"))
Idents(dlpfc) <- "cell_type_broad"
plot_dr(dlpfc, color_by = "cell_type_broad", point_size = 0.2, label = TRUE,
reduction = "harm.wnn.umap_batch", colors = palette_labels_harmonized,
label_size = 5, legend = TRUE,
title = glue("DLPFC, final cell type labels, broad, n = {ncol(dlpfc)}"))
Plot cluster numbers:
dlpfc@meta.data %>%
group_by(cell_type_broad, cell_type_granular) %>%
count() %>%
ggplot(aes(x = cell_type_granular, y = n)) +
geom_bar(aes(fill = cell_type_broad), stat = "identity") +
geom_text(aes(label = n, color = cell_type_broad), hjust = -0.1, angle = 90) +
scale_fill_manual(values = palette_labels_harmonized) +
scale_color_manual(values = palette_labels_harmonized) +
rotate_x() +
ggtitle("Number of cells per granular cluster")
dlpfc@meta.data %>%
group_by(cell_type_broad) %>%
count() %>%
ggplot(aes(x = cell_type_broad, y = log10(n))) +
geom_bar(aes(fill = cell_type_broad), stat = "identity") +
geom_text(aes(label = n, color = cell_type_broad), hjust = -0.1, angle = 90) +
scale_fill_manual(values = palette_labels_harmonized) +
scale_color_manual(values = palette_labels_harmonized) +
rotate_x() +
ggtitle("Number of cells per broad cluster")
dlpfc@meta.data %>%
group_by(cell_type_broad) %>%
summarize(sum_ncount = sum(nCount_ATAC)) %>%
ggplot(aes(x = cell_type_broad, y = log10(sum_ncount))) +
geom_bar(aes(fill = cell_type_broad), stat = "identity") +
geom_text(aes(label = sum_ncount, color = cell_type_broad), hjust = -0.1,
angle = 90) +
scale_fill_manual(values = palette_labels_harmonized) +
scale_color_manual(values = palette_labels_harmonized) +
rotate_x() +
ggtitle("Number of fragments in peaks, per broad cluster") +
coord_cartesian(ylim = c(0, 10))
DotPlot(dlpfc, assay = "RNA", features = rev(c(
"PDGFRA",
"PLP1",
"AQP4",
"LY86",
"VWF",
"GAD1",
"VIP",
"SST",
"PVALB",
"LAMP5",
"SLC17A8",
"SLC17A7",
"CUX2",
# "FREM3",
"RORB",
"FEZF2",
"FOXP2",
"NXPH3",
"SULF1"
))) +
ylab("Cell type") +
coord_flip() +
rotate_x()
DotPlot(dlpfc, assay = "RNA", group.by = "cell_type_broad", features = rev(c(
"PDGFRA",
"PLP1",
"AQP4",
"LY86",
"VWF",
"COL1A2",
"GAD1",
"VIP",
"SST",
"PVALB",
"LAMP5",
"SLC17A8",
"SLC17A7",
"CUX2",
"LHX2",
"RORB",
"FEZF2",
"FOXP2",
"TLE4",
"NXPH3",
"SULF1"
))) +
ylab("Cell type") +
coord_flip() +
rotate_x()
# we can reload labels for later analysis.
dlpfc_meta <- dlpfc@meta.data
saveRDS(dlpfc_meta, file = file.path(
merged_dir, "DLPFC.integrated.batch.analyzed.cell_metadata.labeled.rds"))
Idents(dlpfc) <- "cell_type_broad"
markers_dlpfc <- FindAllMarkers(dlpfc, only.pos = TRUE, logfc.threshold = 0.25,
max.cells.per.ident = 1000) %>%
mutate(pct.diff = pct.1 - pct.2)
## Calculating cluster OPC
## Calculating cluster Oligo
## Calculating cluster Astro
## Calculating cluster Microglia PVM
## Calculating cluster Endo
## Calculating cluster Vascular leptomeningeal
## Calculating cluster VIP
## Calculating cluster SST
## Calculating cluster PVALB
## Calculating cluster LAMP5
## Calculating cluster SNCG
## Calculating cluster L23 IT
## Calculating cluster L5 IT
## Calculating cluster L5 ET
## Calculating cluster L56 NP
## Calculating cluster L6b
## Calculating cluster L6 IT
## Calculating cluster L6 IT CAR3
## Calculating cluster L6 CT
## Calculating cluster Unknown
write_tsv(markers_dlpfc, file.path(merged_dir,
"DLPFC.markers.cell_type_broad.tsv"))
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/libf77blas.so.3.10.3
## LAPACK: /users/guozy/miniconda3/envs/psych/lib/libopenblasp-r0.3.27.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 viridis_0.6.4 viridisLite_0.4.2 Signac_1.9.0
## [5] ggplot2_3.5.1 SeuratObject_5.0.0 Seurat_4.3.0 glue_1.6.2
## [9] readr_2.1.4 purrr_1.0.2 dplyr_1.1.2 tidyr_1.3.0
## [13] here_1.0.1 Cairo_1.6-2
##
## loaded via a namespace (and not attached):
## [1] spam_2.9-1 fastmatch_1.1-4 plyr_1.8.9
## [4] igraph_2.0.3 lazyeval_0.2.2 sp_2.1-4
## [7] splines_4.2.0 BiocParallel_1.32.6 listenv_0.9.1
## [10] scattermore_1.2 GenomeInfoDb_1.34.9 digest_0.6.35
## [13] htmltools_0.5.8.1 fansi_1.0.6 magrittr_2.0.3
## [16] tensor_1.5 cluster_2.1.6 ROCR_1.0-11
## [19] limma_3.54.2 tzdb_0.4.0 globals_0.16.3
## [22] Biostrings_2.66.0 matrixStats_1.0.0 vroom_1.6.5
## [25] spatstat.sparse_3.0-3 colorspace_2.1-0 rappdirs_0.3.3
## [28] ggrepel_0.9.5 xfun_0.44 crayon_1.5.2
## [31] RCurl_1.98-1.14 jsonlite_1.8.8 progressr_0.14.0
## [34] spatstat.data_3.0-4 survival_3.6-4 zoo_1.8-12
## [37] polyclip_1.10-6 gtable_0.3.5 zlibbioc_1.44.0
## [40] XVector_0.38.0 leiden_0.4.3.1 future.apply_1.11.2
## [43] BiocGenerics_0.44.0 abind_1.4-5 scales_1.3.0
## [46] spatstat.random_3.2-3 miniUI_0.1.1.1 Rcpp_1.0.11
## [49] xtable_1.8-4 reticulate_1.37.0 bit_4.0.5
## [52] dotCall64_1.1-1 stats4_4.2.0 htmlwidgets_1.6.4
## [55] httr_1.4.7 ica_1.0-3 farver_2.1.2
## [58] pkgconfig_2.0.3 sass_0.4.9 uwot_0.2.2
## [61] deldir_2.0-4 utf8_1.2.4 labeling_0.4.3
## [64] tidyselect_1.2.1 rlang_1.1.4 reshape2_1.4.4
## [67] later_1.3.2 munsell_0.5.1 tools_4.2.0
## [70] cachem_1.1.0 cli_3.6.2 generics_0.1.3
## [73] ggridges_0.5.6 evaluate_0.24.0 stringr_1.5.1
## [76] fastmap_1.2.0 yaml_2.3.8 goftest_1.2-3
## [79] bit64_4.0.5 knitr_1.47 fitdistrplus_1.1-11
## [82] RANN_2.6.1 pbapply_1.7-2 future_1.33.2
## [85] nlme_3.1-164 mime_0.12 RcppRoll_0.3.0
## [88] compiler_4.2.0 plotly_4.10.4 png_0.1-8
## [91] spatstat.utils_3.0-4 tibble_3.2.1 bslib_0.7.0
## [94] stringi_1.8.4 highr_0.11 lattice_0.22-6
## [97] Matrix_1.6-1.1 vctrs_0.6.5 pillar_1.9.0
## [100] lifecycle_1.0.4 spatstat.geom_3.2-9 lmtest_0.9-40
## [103] jquerylib_0.1.4 RcppAnnoy_0.0.22 data.table_1.14.8
## [106] cowplot_1.1.3 bitops_1.0-7 irlba_2.3.5.1
## [109] httpuv_1.6.15 patchwork_1.2.0 GenomicRanges_1.50.2
## [112] R6_2.5.1 promises_1.3.0 KernSmooth_2.23-20
## [115] gridExtra_2.3 IRanges_2.32.0 parallelly_1.37.1
## [118] codetools_0.2-20 MASS_7.3-56 rprojroot_2.0.4
## [121] withr_3.0.0 sctransform_0.4.0 Rsamtools_2.14.0
## [124] S4Vectors_0.36.2 GenomeInfoDbData_1.2.9 parallel_4.2.0
## [127] hms_1.1.3 grid_4.2.0 rmarkdown_2.27
## [130] Rtsne_0.17 spatstat.explore_3.2-7 shiny_1.8.1.1