exp_id <- "20231009_multiome_analysis/03-integration" # determines the name of 
# the cache folder
doc_id <- "05a" # determines name of the subfolder of `figures` where 
# pngs/pdfs are saved
out <- here::here("outs", exp_id, doc_id, "/"); dir.create(out, recursive = TRUE)
figout <- here::here("outs", exp_id, doc_id, "figures/")
cache <- file.path("~/scratch/cache", exp_id, doc_id, "/")

1 Overview

Here, we’ll assemble the final annotations for each dataset. Consulting several resources:

Hodge, R.D., Bakken, T.E., Miller, J.A., Smith, K.A., Barkan, E.R., Graybuck, L.T., Close, J.L., Long, B., Johansen, N., Penn, O., et al. (2019). Conserved cell types with divergent features in human versus mouse cortex. Nature 573, 61–68. 10.1038/s41586-019-1506-7.

Bakken, T.E., Jorstad, N.L., Hu, Q., Lake, B.B., Tian, W., Kalmbach, B.E., Crow, M., Hodge, R.D., Krienen, F.M., Sorensen, S.A., et al. (2021). Comparative cellular analysis of motor cortex in human, marmoset and mouse. Nature 598, 111–119. 10.1038/s41586-021-03465-8.

Franjic, D., Skarica, M., Ma, S., Arellano, J.I., Tebbenkamp, A.T.N., Choi, J., Xu, C., Li, Q., Morozov, Y.M., Andrijevic, D., et al. (2022). Transcriptomic taxonomy and neurogenic trajectories of adult human, macaque, and pig hippocampal and entorhinal cells. Neuron 110, 452–469.e14. 10.1016/j.neuron.2021.10.036.

In general, to finalize cell type annotation, we will subcluster glutamatergic and GABAergic neurons separately.

2 Set up

library(here)
library(tidyr)
library(dplyr)
library(purrr)
library(readr)
library(glue)
library(Seurat)
library(ggplot2)
library(Signac)

source(here("functions_r/scRNAseq.R"))

ggplot2::theme_set(theme_min())

3 Load data

merged_dir <- "/oak/stanford/groups/akundaje/projects/psychencode/outs/20231009_multiome_analysis/03-integration/merged_objects/"
dacc <- readRDS(file.path(merged_dir, "dACC.integrated.batch.analyzed.rds"))

4 Cell type annotation

We manually assigned colors for each broad label, taken from the following studies to approximately match the color scheme used in the Allen Institute papers.

Conventions:

# load labels and color palette
labels <- readr::read_tsv(
  here("exps/20231009_multiome_analysis_v2/03-integration/label_map.tsv")) %>% 
  tibble::rowid_to_column(var = "Order")
## Rows: 60 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Class, Label, Color, Label_harmonized, Label_chrombpnet
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
palette_labels <- labels %>% dplyr::select(Label, Color) %>% tibble::deframe()
palette_labels_harmonized <- labels %>% distinct(Label_harmonized, Color) %>% 
  tibble::deframe()

labels
Idents(dacc) <- "seurat_clusters"
p0 <- plot_dr(dacc, color_by = "wsnn_batch_res.0.6", point_size = 0.2, 
              label = TRUE, reduction = "harm.wnn.umap_batch",
              label_size = 5, legend = FALSE,
              title = glue("DACC, n = {ncol(dacc)}")) + xlab("UMAP 1") + 
  ylab("UMAP 2")

Idents(dacc) <- "predicted.subclass"
p1 <- plot_dr(dacc, color_by = "predicted.cell_type_percluster", 
              point_size = 0.2, label = TRUE, reduction = "harm.wnn.umap_batch", 
              colors = palette_labels,
              label_size = 5, legend = FALSE,
              title = glue("DACC, n = {ncol(dacc)}")) + xlab("UMAP 1") + 
  ylab("UMAP 2")

p2 <- plot_dr(dacc, color_by = "predicted.subclass", point_size = 0.2, 
              label = TRUE, reduction = "harm.wnn.umap_batch", 
              colors = palette_labels,
              label_size = 5, legend = FALSE,
              title = glue("DACC, n = {ncol(dacc)}"))

p0 + p1 + p2

dacc@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = predicted.cell_type_percluster), position = "fill") +
  scale_fill_manual(values = palette_labels) +
  ggtitle("DACC cluster breakdown by human cortex predictions (majority per cluster)")

dacc@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = predicted.subclass), position = "fill") +
  scale_fill_manual(values = palette_labels) +
  ggtitle("DACC cluster breakdown by human cortex predictions")

dacc@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = predicted.cell_type_percluster)) +
  scale_fill_manual(values = palette_labels) +
  ggtitle("DACC cluster breakdown by human cortex predictions (majority per cluster)")

dacc@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = predicted.subclass)) +
  scale_fill_manual(values = palette_labels) +
  ggtitle("DACC cluster breakdown by human cortex predictions")

# add in broader cell class from labels dataframe column
dacc@meta.data$predicted.cell_class <- dacc@meta.data$predicted.subclass
dacc@meta.data$predicted.cell_class <- plyr::mapvalues(
  dacc@meta.data$predicted.cell_class, from = labels$Label, to = labels$Class)
## The following `from` values were not present in `x`: COP, Micro, Macro, Myeloid, T, aEndo, PC, vSMC, aSMC, VLMC_VSMC, InN VIP, InN NR2F2, InN SST, InN PVALB, InN LAMP5, InN LHX6, InN MEIS2, ExN, L2-5 IT, L2-6 IT, EC L2, EC L3, EC L56, EC L5, L6 CT and L6b, EC L6, EC L6b, DG GC, DG MC, DG, CA3, CA2, CA1 dorsal, CA1 ventral, CA1-3_and_subiculum, SUB distal, SUB proximal, SUB, CR RELN, Unknown
# get the most common cell class for each cluster
df_cluster_celltype <- dacc@meta.data %>% dplyr::select(
  seurat_clusters, predicted.cell_class) %>% 
  group_by(seurat_clusters, predicted.cell_class) %>%
  summarise(n = n()) %>% 
  top_n(1, n)
## `summarise()` has grouped output by 'seurat_clusters'. You can override using
## the `.groups` argument.
# add it back to the object
dacc@meta.data$predicted.cell_class_percluster <- plyr::mapvalues(
  dacc@meta.data$seurat_clusters, from = df_cluster_celltype$seurat_clusters, 
  to = df_cluster_celltype$predicted.cell_class)

4.1 Glutamatergic

plot_dr(dacc, color_by = "predicted.subclass", point_size = 0.2, label = FALSE, 
        reduction = "harm.wnn.umap_batch", colors = palette_labels,
        cells = WhichCells(dacc, expr = predicted.cell_class == "Glutamatergic"),
        label_size = 5, legend = FALSE,
        title = "dACC, highlighting glutamatergic cells")

dacc_glutamatergic <- subset(dacc, cells = WhichCells(
  dacc, expr = predicted.cell_class_percluster == "Glutamatergic"))
table(dacc_glutamatergic$predicted.cell_class_percluster)
## 
##  Non-neuronal Glutamatergic     GABAergic 
##             0         21139             0
# call subcluster_seurat() function in functions_r/scRNAseq.R
dacc_glutamatergic <- subcluster_seurat(dacc_glutamatergic, 
                                        key = "Glutamatergic", 
                                        resolution = 0.2)
## Performing TF-IDF normalization
## Running SVD
## Scaling cell embeddings
cowplot::plot_grid(umap(dacc_glutamatergic, color_by = "predicted.subclass", 
                        colors = palette_labels),
                   umap(dacc_glutamatergic, 
                        color_by = "Clusters_subset_Glutamatergic"))

# added by James: bar plot for cell type proportion per cluster
dacc_glutamatergic@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = predicted.subclass), position = "fill") +
  scale_fill_manual(values = palette_labels) +
  ylab('proportion')+
  ggtitle("dacc cluster breakdown for glutamatergic")

# for each subcluster, first get the majority vote. Then, if any clusters have
# the same majority, add a numeric suffix (1, 2, etc)

# get the most common cell class for each cluster
df_glutamatergic <- dacc_glutamatergic@meta.data %>% 
  dplyr::select(Clusters_subset_Glutamatergic, predicted.subclass) %>% 
  group_by(Clusters_subset_Glutamatergic, predicted.subclass) %>%
  dplyr::summarise(n_per = dplyr::n()) %>% 
  top_n(1, n_per) %>%
  # add suffixes to clusters with the same majority vote, only if there's more than one cluster
  group_by(predicted.subclass) %>%
  dplyr::mutate(n_per_label = dplyr::n()) %>% 
  dplyr::mutate(new_label = case_when(
    n_per_label > 1 ~ paste0(predicted.subclass, "_", row_number()),
    TRUE ~ predicted.subclass
  ))
## `summarise()` has grouped output by 'Clusters_subset_Glutamatergic'. You can
## override using the `.groups` argument.
df_glutamatergic
dacc_glutamatergic@meta.data$cell_type_final <- plyr::mapvalues(
  dacc_glutamatergic@meta.data$Clusters_subset_Glutamatergic, 
  from = df_glutamatergic$Clusters_subset_Glutamatergic, 
  to = df_glutamatergic$new_label)

Idents(dacc_glutamatergic) <- "cell_type_final"
plot_dr(dacc_glutamatergic, color_by = "predicted.subclass", point_size = 0.2, 
        label = TRUE, reduction = "harm.wnn.umap_batch",
        colors = palette_labels,
        label_size = 5, legend = TRUE,
        title = glue("dACC glutamatergic neurons, final cell type labels, \nn = {ncol(dacc_glutamatergic)}"))

saveRDS(dacc_glutamatergic, file = file.path(
  merged_dir, "dACC.subset_glutamatergic.seurat.Rds"))
# call markers on subsetted object
markers_glutamatergic <- FindAllMarkers(dacc_glutamatergic, only.pos = TRUE, 
                                        min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster L2/3 IT_1
## Calculating cluster L2/3 IT_2
## Calculating cluster L5 IT_1
## Calculating cluster L6 CT_1
## Calculating cluster L2/3 IT_3
## Calculating cluster L2/3 IT_4
## Calculating cluster L6b
## Calculating cluster L5 IT_2
## Calculating cluster L5/6 NP
## Calculating cluster L2/3 IT_5
## Calculating cluster L2/3 IT_6
## Calculating cluster L6 IT Car3
## Calculating cluster L5 IT_3
## Calculating cluster L5 ET
## Calculating cluster L5 IT_4
## Calculating cluster L6 CT_2
write_tsv(markers_glutamatergic, file.path(
  merged_dir, "dACC.subset_glutamatergic.markers.tsv"))
top5 <- markers_glutamatergic %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC)
DoHeatmap(subset(x = dacc_glutamatergic, downsample = 100), 
          features = top5$gene) + viridis::scale_fill_viridis()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

dacc_glutamatergic@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = condition), position = "fill")

dacc_glutamatergic@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = sample), position = "fill")

4.2 GABAergic

plot_dr(dacc, color_by = "predicted.subclass", point_size = 0.2, label = FALSE, 
        reduction = "harm.wnn.umap_batch", colors = palette_labels,
        cells = WhichCells(dacc, expr = predicted.cell_class == "GABAergic"),
        label_size = 5, legend = FALSE,
        title = "dACC, highlighting GABAergic cells")

dacc_gabaergic <- subset(dacc, cells = WhichCells(
  dacc, expr = predicted.cell_class_percluster == "GABAergic"))
table(dacc_gabaergic$predicted.cell_class_percluster)
## 
##  Non-neuronal Glutamatergic     GABAergic 
##             0             0         11949
dacc_gabaergic <- subcluster_seurat(dacc_gabaergic, key = "GABAergic")
## Performing TF-IDF normalization
## Running SVD
## Scaling cell embeddings
cowplot::plot_grid(umap(dacc_gabaergic, color_by = "predicted.subclass", 
                        colors = palette_labels), umap(
                          dacc_gabaergic, color_by = "Clusters_subset_GABAergic"))

# added by James: bar plot for cell type percentage per cluster
dacc_gabaergic@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = predicted.subclass), position = "fill") +
  scale_fill_manual(values = palette_labels) +
  ylab('proportion')+
  ggtitle("dacc cluster breakdown for gabaergic")

# for each subcluster, first get the majority vote. Then, if any clusters have
# the same majority, add a numeric suffix (1, 2, etc)

# get the most common cell class for each cluster
df_gabaergic <- dacc_gabaergic@meta.data %>% dplyr::select(
  Clusters_subset_GABAergic, predicted.subclass) %>% 
  group_by(Clusters_subset_GABAergic, predicted.subclass) %>%
  dplyr::summarise(n_per = dplyr::n()) %>% 
  top_n(1, n_per) %>%
  # add suffixes to clusters with the same majority vote, only if there's more than one cluster
  group_by(predicted.subclass) %>%
  dplyr::mutate(n_per_label = dplyr::n()) %>% 
  dplyr::mutate(new_label = case_when(
    n_per_label > 1 ~ paste0(predicted.subclass, "_", row_number()),
    TRUE ~ predicted.subclass
  ))
## `summarise()` has grouped output by 'Clusters_subset_GABAergic'. You can
## override using the `.groups` argument.
df_gabaergic
dacc_gabaergic@meta.data$cell_type_final <- plyr::mapvalues(
  dacc_gabaergic@meta.data$Clusters_subset_GABAergic, 
  from = df_gabaergic$Clusters_subset_GABAergic, 
  to = df_gabaergic$new_label)

Idents(dacc_gabaergic) <- "cell_type_final"
saveRDS(dacc_gabaergic, file = file.path(merged_dir, 
                                         "dACC.subset_gabaergic.seurat.Rds"))
plot_dr(dacc_gabaergic, color_by = "predicted.subclass", point_size = 0.2, 
        label = TRUE, reduction = "harm.wnn.umap_batch", colors = palette_labels,
        label_size = 5, legend = TRUE,
        title = glue("dACC GABAergic neurons, final cell type labels, \nn = {ncol(dacc_gabaergic)}"))

# call markers on subsetted object
markers_gabaergic <- FindAllMarkers(dacc_gabaergic, only.pos = TRUE, 
                                    min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster Sst_1
## Calculating cluster Vip
## Calculating cluster Pvalb_1
## Calculating cluster Sncg_1
## Calculating cluster Lamp5_1
## Calculating cluster Sst_2
## Calculating cluster Sncg_2
## Calculating cluster Lamp5_2
## Calculating cluster Pvalb_2
## Calculating cluster Pvalb_3
## Calculating cluster Sst_3
## Calculating cluster Lamp5_3
## Calculating cluster Sst Chodl
write_tsv(markers_gabaergic, file.path(merged_dir, 
                                       "dACC.subset_gabaergic.markers.tsv"))
top5 <- markers_gabaergic %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC)
DoHeatmap(subset(x = dacc_gabaergic, downsample = 100), 
          features = top5$gene) + viridis::scale_fill_viridis()
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

dacc_gabaergic@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = condition), position = "fill")

dacc_gabaergic@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = sample), position = "fill")

4.3 Assemble final annotations

Specify certain subclusters to label as “unknown” due to lack of clear markers or mixed annotations.

# define clusters to drop, then convert the column back to character; and replace those labels
Idents(dacc_glutamatergic) <- "seurat_clusters"
clusters_unk_glut <- c("2", "5")
cells_unk_glut <- WhichCells(dacc_glutamatergic, idents = clusters_unk_glut)
dacc_glutamatergic$cell_type_final <- as.character(dacc_glutamatergic$cell_type_final)
dacc_glutamatergic$cell_type_final[cells_unk_glut] <- "Unknown"

Idents(dacc_gabaergic) <- "seurat_clusters"
clusters_unk_gaba <- c("11")
cells_unk_gaba <- WhichCells(dacc_gabaergic, idents = clusters_unk_gaba)
dacc_gabaergic$cell_type_final <- as.character(dacc_gabaergic$cell_type_final)
dacc_gabaergic$cell_type_final[cells_unk_gaba] <- "Unknown"
# as a default, set the final cell type to the majority cell type per cluster
dacc$cell_type_final <- as.character(dacc$predicted.cell_type_percluster)

dacc@meta.data$Clusters_all <- dacc@meta.data$seurat_clusters
dacc@meta.data$Clusters_subset_glutamatergic <- NA
dacc@meta.data$Clusters_subset_GABAergic <- NA

# add glutamatergic subclusters into main object
cells_glut <- Cells(dacc_glutamatergic)
dacc@meta.data[cells_glut, ]$Clusters_subset_glutamatergic <- 
  dacc_glutamatergic@meta.data$Clusters_subset_Glutamatergic
dacc@meta.data[cells_glut, ]$cell_type_final <- 
  as.character(dacc_glutamatergic@meta.data$cell_type_final)

# add gabaergic subclusters into main object
cells_gab <- Cells(dacc_gabaergic)
dacc@meta.data[cells_gab, ]$Clusters_subset_GABAergic <- 
  dacc_gabaergic@meta.data$Clusters_subset_GABAergic
dacc@meta.data[cells_gab, ]$cell_type_final <- 
  as.character(dacc_gabaergic@meta.data$cell_type_final)
# relabel with harmonized cell type labels
label_map_new <- data.frame(Label_granular = unique(dacc@meta.data$cell_type_final)) %>% 
  mutate(Label_granular_old = Label_granular) %>% 
  separate(Label_granular, into = c("Label_wo_suffix", "suffix"), sep = "_") %>% 
  left_join(labels, by = c("Label_wo_suffix" = "Label")) %>% 
  mutate(Label_granular = case_when(
    !is.na(suffix) ~ paste0(Label_harmonized, "_", suffix),
    TRUE ~ Label_harmonized
  )) %>% 
  arrange(Order)

dacc@meta.data$cell_type_granular <- plyr::mapvalues(
  dacc@meta.data$cell_type_final, 
  from = label_map_new$Label_granular_old, 
  to = label_map_new$Label_granular)

dacc@meta.data$cell_type_broad <- plyr::mapvalues(
  dacc@meta.data$cell_type_final, 
  from = label_map_new$Label_granular_old, 
  to = label_map_new$Label_harmonized)

dacc@meta.data$cell_type_final <- NULL

# put the labels in the cell type order from our label map, to get a rational order
dacc_order <- label_map_new %>%
  dplyr::filter(Label_granular %in% dacc$cell_type_granular) %>%
  pull(Label_granular) %>% base::unique()
dacc_order
##  [1] "OPC"                     "Oligo"                  
##  [3] "Astro"                   "Microglia PVM"          
##  [5] "Endo"                    "Vascular leptomeningeal"
##  [7] "VIP"                     "SST_2"                  
##  [9] "SST_1"                   "SST CHODL"              
## [11] "PVALB_1"                 "PVALB_2"                
## [13] "PVALB_3"                 "LAMP5_2"                
## [15] "LAMP5_1"                 "LAMP5_3"                
## [17] "SNCG_2"                  "SNCG_1"                 
## [19] "L23 IT_1"                "L23 IT_6"               
## [21] "L23 IT_5"                "L23 IT_4"               
## [23] "L5 IT_1"                 "L5 IT_2"                
## [25] "L5 IT_4"                 "L5 IT_3"                
## [27] "L5 ET"                   "L56 NP"                 
## [29] "L6b"                     "L6 IT CAR3"             
## [31] "L6 CT_1"                 "L6 CT_2"                
## [33] "Unknown"
dacc_order_broad <- labels %>%
  dplyr::filter(Label_harmonized %in% unique(dacc$cell_type_broad)) %>%
  pull(Label_harmonized) %>%
  base::unique()
dacc_order_broad
##  [1] "OPC"                     "Oligo"                  
##  [3] "Astro"                   "Microglia PVM"          
##  [5] "Endo"                    "Vascular leptomeningeal"
##  [7] "VIP"                     "SST"                    
##  [9] "SST CHODL"               "PVALB"                  
## [11] "LAMP5"                   "SNCG"                   
## [13] "L23 IT"                  "L5 IT"                  
## [15] "L5 ET"                   "L56 NP"                 
## [17] "L6b"                     "L6 IT CAR3"             
## [19] "L6 CT"                   "Unknown"
# put the labels in the cell type order from our label map, to get a rational order
dacc$cell_type_broad <- factor(dacc$cell_type_broad, levels = dacc_order_broad)
dacc$cell_type_granular <- factor(dacc$cell_type_granular, levels = dacc_order)
Idents(dacc) <- "cell_type_granular"

plot_dr(dacc, color_by = "cell_type_broad", point_size = 0.2, label = TRUE,
        reduction = "harm.wnn.umap_batch", colors = palette_labels_harmonized,
        label_size = 5, legend = TRUE,
        title = glue("dACC, final cell type labels, granular, n = {ncol(dacc)}"))

Idents(dacc) <- "cell_type_broad"
plot_dr(dacc, color_by = "cell_type_broad", point_size = 0.2, label = TRUE,
        reduction = "harm.wnn.umap_batch", colors = palette_labels_harmonized,
        label_size = 5, legend = TRUE,
        title = glue("dACC, final cell type labels, broad, n = {ncol(dacc)}"))

Plot cluster numbers:

dacc@meta.data %>% 
  group_by(cell_type_broad, cell_type_granular) %>% 
  count() %>% 
  ggplot(aes(x = cell_type_granular, y = n)) +
  geom_bar(aes(fill = cell_type_broad), stat = "identity") +
  geom_text(aes(label = n, color = cell_type_broad), hjust = -0.1, angle = 90) +
  scale_fill_manual(values = palette_labels_harmonized) +
  scale_color_manual(values = palette_labels_harmonized) +
  rotate_x() +
  ggtitle("Number of cells per granular cluster")

dacc@meta.data %>% 
  group_by(cell_type_broad) %>% 
  count() %>% 
  ggplot(aes(x = cell_type_broad, y = log10(n))) +
  geom_bar(aes(fill = cell_type_broad), stat = "identity") +
  geom_text(aes(label = n, color = cell_type_broad), hjust = -0.1, angle = 90) +
  scale_fill_manual(values = palette_labels_harmonized) +
  scale_color_manual(values = palette_labels_harmonized) +
  rotate_x() +
  ggtitle("Number of cells per broad cluster")

dacc@meta.data %>% 
  group_by(cell_type_broad) %>% 
  summarize(sum_ncount = sum(nCount_ATAC)) %>% 
  ggplot(aes(x = cell_type_broad, y = log10(sum_ncount))) +
  geom_bar(aes(fill = cell_type_broad), stat = "identity") +
  geom_text(aes(label = sum_ncount, color = cell_type_broad), hjust = -0.1, 
            angle = 90) +
  scale_fill_manual(values = palette_labels_harmonized) +
  scale_color_manual(values = palette_labels_harmonized) +
  rotate_x() +
  ggtitle("Number of fragments in peaks, per broad cluster") +
  coord_cartesian(ylim = c(0, 10))

DotPlot(dacc, assay = "RNA", group.by = "cell_type_broad", features = rev(c(
  "PDGFRA",
  "PLP1",
  "AQP4",
  "LY86",
  "VWF",
  "COL1A2",
  "GAD1",
  "VIP",
  "SST",
  "PVALB",
  "LAMP5",
  "SLC17A8",
  "SLC17A7",
  "CUX2",
  "LHX2",
  "RORB",
  "FEZF2",
  "FOXP2",
  "TLE4",
  "NXPH3",
  "SULF1"
))) +
  ylab("Cell type") +
  coord_flip() +
  rotate_x()

dacc_meta <- dacc@meta.data
saveRDS(dacc_meta, file = file.path(
  merged_dir, "dACC.integrated.batch.analyzed.cell_metadata.labeled.rds"))

5 Call markers

Idents(dacc) <- "cell_type_broad"
markers_dacc <- FindAllMarkers(dacc, only.pos = TRUE, logfc.threshold = 0.25, 
                               max.cells.per.ident = 1000) %>%
    mutate(pct.diff = pct.1 - pct.2)
write_tsv(markers_dacc, file.path(merged_dir, 
                                  "dACC.markers.cell_type_broad.tsv"))

6 Session info

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/libf77blas.so.3.10.3
## LAPACK: /users/guozy/miniconda3/envs/psych/lib/libopenblasp-r0.3.27.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-3 viridis_0.6.4      viridisLite_0.4.2  Signac_1.9.0      
##  [5] ggplot2_3.5.1      SeuratObject_5.0.0 Seurat_4.3.0       glue_1.6.2        
##  [9] readr_2.1.4        purrr_1.0.2        dplyr_1.1.2        tidyr_1.3.0       
## [13] here_1.0.1         Cairo_1.6-2       
## 
## loaded via a namespace (and not attached):
##   [1] spam_2.9-1             fastmatch_1.1-4        plyr_1.8.9            
##   [4] igraph_2.0.3           lazyeval_0.2.2         sp_2.1-4              
##   [7] splines_4.2.0          BiocParallel_1.32.6    listenv_0.9.1         
##  [10] scattermore_1.2        GenomeInfoDb_1.34.9    digest_0.6.35         
##  [13] htmltools_0.5.8.1      fansi_1.0.6            magrittr_2.0.3        
##  [16] tensor_1.5             cluster_2.1.6          ROCR_1.0-11           
##  [19] limma_3.54.2           tzdb_0.4.0             globals_0.16.3        
##  [22] Biostrings_2.66.0      matrixStats_1.0.0      vroom_1.6.5           
##  [25] spatstat.sparse_3.0-3  colorspace_2.1-0       rappdirs_0.3.3        
##  [28] ggrepel_0.9.5          xfun_0.44              crayon_1.5.2          
##  [31] RCurl_1.98-1.14        jsonlite_1.8.8         progressr_0.14.0      
##  [34] spatstat.data_3.0-4    survival_3.6-4         zoo_1.8-12            
##  [37] polyclip_1.10-6        gtable_0.3.5           zlibbioc_1.44.0       
##  [40] XVector_0.38.0         leiden_0.4.3.1         future.apply_1.11.2   
##  [43] BiocGenerics_0.44.0    abind_1.4-5            scales_1.3.0          
##  [46] spatstat.random_3.2-3  miniUI_0.1.1.1         Rcpp_1.0.11           
##  [49] xtable_1.8-4           reticulate_1.37.0      bit_4.0.5             
##  [52] dotCall64_1.1-1        stats4_4.2.0           htmlwidgets_1.6.4     
##  [55] httr_1.4.7             ica_1.0-3              farver_2.1.2          
##  [58] pkgconfig_2.0.3        sass_0.4.9             uwot_0.2.2            
##  [61] deldir_2.0-4           utf8_1.2.4             labeling_0.4.3        
##  [64] tidyselect_1.2.1       rlang_1.1.4            reshape2_1.4.4        
##  [67] later_1.3.2            munsell_0.5.1          tools_4.2.0           
##  [70] cachem_1.1.0           cli_3.6.2              generics_0.1.3        
##  [73] ggridges_0.5.6         evaluate_0.24.0        stringr_1.5.1         
##  [76] fastmap_1.2.0          yaml_2.3.8             goftest_1.2-3         
##  [79] bit64_4.0.5            knitr_1.47             fitdistrplus_1.1-11   
##  [82] RANN_2.6.1             pbapply_1.7-2          future_1.33.2         
##  [85] nlme_3.1-164           mime_0.12              RcppRoll_0.3.0        
##  [88] compiler_4.2.0         plotly_4.10.4          png_0.1-8             
##  [91] spatstat.utils_3.0-4   tibble_3.2.1           bslib_0.7.0           
##  [94] stringi_1.8.4          highr_0.11             lattice_0.22-6        
##  [97] Matrix_1.6-1.1         vctrs_0.6.5            pillar_1.9.0          
## [100] lifecycle_1.0.4        spatstat.geom_3.2-9    lmtest_0.9-40         
## [103] jquerylib_0.1.4        RcppAnnoy_0.0.22       data.table_1.14.8     
## [106] cowplot_1.1.3          bitops_1.0-7           irlba_2.3.5.1         
## [109] httpuv_1.6.15          patchwork_1.2.0        GenomicRanges_1.50.2  
## [112] R6_2.5.1               promises_1.3.0         KernSmooth_2.23-20    
## [115] gridExtra_2.3          IRanges_2.32.0         parallelly_1.37.1     
## [118] codetools_0.2-20       MASS_7.3-56            rprojroot_2.0.4       
## [121] withr_3.0.0            sctransform_0.4.0      Rsamtools_2.14.0      
## [124] S4Vectors_0.36.2       GenomeInfoDbData_1.2.9 parallel_4.2.0        
## [127] hms_1.1.3              grid_4.2.0             rmarkdown_2.27        
## [130] Rtsne_0.17             spatstat.explore_3.2-7 shiny_1.8.1.1