exp_id     <- "20231009_multiome_analysis/03-integration" # determines the name of the cache folder
doc_id     <- "05b" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out        <- here::here("outs", exp_id, doc_id, "/"); dir.create(out, recursive = TRUE)
figout     <- here::here("outs", exp_id, doc_id, "figures/")
cache      <- file.path("~/scratch/cache", exp_id, doc_id, "/")

1 Overview

Here, we’ll assemble the final annotations for each dataset. Consulting several resources:

Hodge, R.D., Bakken, T.E., Miller, J.A., Smith, K.A., Barkan, E.R., Graybuck, L.T., Close, J.L., Long, B., Johansen, N., Penn, O., et al. (2019). Conserved cell types with divergent features in human versus mouse cortex. Nature 573, 61–68. 10.1038/s41586-019-1506-7.

Bakken, T.E., Jorstad, N.L., Hu, Q., Lake, B.B., Tian, W., Kalmbach, B.E., Crow, M., Hodge, R.D., Krienen, F.M., Sorensen, S.A., et al. (2021). Comparative cellular analysis of motor cortex in human, marmoset and mouse. Nature 598, 111–119. 10.1038/s41586-021-03465-8.

Franjic, D., Skarica, M., Ma, S., Arellano, J.I., Tebbenkamp, A.T.N., Choi, J., Xu, C., Li, Q., Morozov, Y.M., Andrijevic, D., et al. (2022). Transcriptomic taxonomy and neurogenic trajectories of adult human, macaque, and pig hippocampal and entorhinal cells. Neuron 110, 452–469.e14. 10.1016/j.neuron.2021.10.036.

In general, to finalize cell type annotation, we will subcluster glutamatergic and GABAergic neurons separately.

2 Set up

library(here)
library(tidyr)
library(dplyr)
library(purrr)
library(readr)
library(glue)
library(Seurat)
library(ggplot2)
library(Signac)

source(here("functions_r/scRNAseq.R"))

ggplot2::theme_set(theme_min())

3 Load data

merged_dir <- "/oak/stanford/groups/akundaje/projects/psychencode/outs/20231009_multiome_analysis/03-integration/merged_objects"
dlpfc <- readRDS(file.path(merged_dir, "DLPFC.integrated.batch.analyzed.rds"))

4 Cell type annotation

We manually assigned colors for each broad label, taken from the following studies to approximately match the color scheme used in the Allen Institute papers.

Table 3 in Zhang et al, Nature, 2021 (https://www.nature.com/articles/s41586-021-03705-x)
Table 3 in Yao et al, Cell, 2021 (https://www.sciencedirect.com/science/article/pii/S0092867421005018?via%3Dihub#app2)

Conventions:

predicted.subclass is the per-cell automated annotation from Azimuth
predicted.cell_type_percluster is the majority vote of the per-cell label, within each cluster

# load labels and color scheme
labels <- readr::read_tsv(
  here("exps/20231009_multiome_analysis_v2/03-integration/label_map.tsv")) %>% 
  tibble::rowid_to_column(var = "Order")

## Rows: 60 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Class, Label, Color, Label_harmonized, Label_chrombpnet
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

palette_labels <- labels %>% dplyr::select(Label, Color) %>% tibble::deframe()
palette_labels_harmonized <- labels %>% distinct(Label_harmonized, Color) %>% 
  tibble::deframe()

labels

Idents(dlpfc) <- "seurat_clusters"
p0 <- plot_dr(dlpfc, color_by = "wsnn_batch_res.0.6", point_size = 0.2, 
              label = TRUE, reduction = "harm.wnn.umap_batch",
    label_size = 5, legend = FALSE,
    title = glue("DLPFC, n = {ncol(dlpfc)}")) + xlab("UMAP 1") + ylab("UMAP 2")

Idents(dlpfc) <- "predicted.subclass"
p1 <- plot_dr(dlpfc, color_by = "predicted.cell_type_percluster", 
              point_size = 0.2, label = TRUE, reduction = "harm.wnn.umap_batch", 
              colors = palette_labels, label_size = 5, legend = FALSE, 
              title = glue("DLPFC, n = {ncol(dlpfc)}")) + xlab("UMAP 1") + 
  ylab("UMAP 2")

p2 <- plot_dr(dlpfc, color_by = "predicted.subclass", point_size = 0.2, 
              label = TRUE, reduction = "harm.wnn.umap_batch", 
              colors = palette_labels, label_size = 5, legend = FALSE, 
              title = glue("DLPFC, n = {ncol(dlpfc)}"))

p0 + p1 + p2

dlpfc@meta.data %>%
    ggplot(aes(x = seurat_clusters)) +
    geom_bar(aes(fill = predicted.cell_type_percluster), position = "fill") +
    scale_fill_manual(values = palette_labels) +
    ggtitle("DLPFC cluster breakdown by human cortex predictions (majority per cluster)")

dlpfc@meta.data %>%
    ggplot(aes(x = seurat_clusters)) +
    geom_bar(aes(fill = predicted.subclass), position = "fill") +
    scale_fill_manual(values = palette_labels) +
    ggtitle("DLPFC cluster breakdown by human cortex predictions")

dlpfc@meta.data %>%
    ggplot(aes(x = seurat_clusters)) +
    geom_bar(aes(fill = predicted.cell_type_percluster)) +
    scale_fill_manual(values = palette_labels) +
    ggtitle("DLPFC cluster breakdown by human cortex predictions (majority per cluster)")

dlpfc@meta.data %>%
    ggplot(aes(x = seurat_clusters)) +
    geom_bar(aes(fill = predicted.subclass)) +
    scale_fill_manual(values = palette_labels) +
    ggtitle("DLPFC cluster breakdown by human cortex predictions")

# add in broader cell class from labels dataframe column
dlpfc@meta.data$predicted.cell_class <- dlpfc@meta.data$predicted.subclass
dlpfc@meta.data$predicted.cell_class <- plyr::mapvalues(
  dlpfc@meta.data$predicted.cell_class, from = labels$Label, to = labels$Class)

## The following `from` values were not present in `x`: COP, Micro, Macro, Myeloid, T, aEndo, PC, vSMC, aSMC, VLMC_VSMC, InN VIP, InN NR2F2, InN SST, InN PVALB, InN LAMP5, InN LHX6, InN MEIS2, ExN, L2-5 IT, L2-6 IT, EC L2, EC L3, EC L56, EC L5, L6 CT and L6b, EC L6, EC L6b, DG GC, DG MC, DG, CA3, CA2, CA1 dorsal, CA1 ventral, CA1-3_and_subiculum, SUB distal, SUB proximal, SUB, CR RELN, Unknown

# get the most common cell class for each cluster
df_cluster_celltype <- dlpfc@meta.data %>% 
  dplyr::select(seurat_clusters, predicted.cell_class) %>% 
  group_by(seurat_clusters, predicted.cell_class) %>%
  summarise(n = n()) %>% 
  top_n(1, n)

## `summarise()` has grouped output by 'seurat_clusters'. You can override using
## the `.groups` argument.

# add it back to the object
dlpfc@meta.data$predicted.cell_class_percluster <- plyr::mapvalues(
  dlpfc@meta.data$seurat_clusters, from = df_cluster_celltype$seurat_clusters, 
  to = df_cluster_celltype$predicted.cell_class)

4.1 Glutamatergic

plot_dr(dlpfc, color_by = "predicted.subclass", point_size = 0.2, label = FALSE, 
        reduction = "harm.wnn.umap_batch", colors = palette_labels, 
        cells = WhichCells(dlpfc, expr = predicted.cell_class == "Glutamatergic"), 
        label_size = 5, legend = FALSE, 
        title = "DLPFC, highlighting glutamatergic cells")

dlpfc_glutamatergic <- subset(dlpfc, cells = WhichCells(
  dlpfc, expr = predicted.cell_class_percluster == "Glutamatergic"))
table(dlpfc_glutamatergic$predicted.cell_class_percluster)

## 
##  Non-neuronal Glutamatergic     GABAergic 
##             0         43028             0

# call subcluster_seurat() function in functions_r/scRNAseq.R
dlpfc_glutamatergic <- subcluster_seurat(dlpfc_glutamatergic, 
                                         key = "Glutamatergic", resolution = 0.2)

## Performing TF-IDF normalization

## Running SVD

## Scaling cell embeddings

cowplot::plot_grid(umap(dlpfc_glutamatergic, color_by = "predicted.subclass", 
                        colors = palette_labels), 
                   umap(dlpfc_glutamatergic, 
                        color_by = "Clusters_subset_Glutamatergic"))

# added by James: bar plot for cell type proportion per cluster
dlpfc_glutamatergic@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = predicted.subclass), position = "fill") +
  scale_fill_manual(values = palette_labels) +
  ylab('proportion')+
  ggtitle("dlpfc cluster breakdown for glutamatergic")

# for each subcluster, first get the majority vote. Then, if any clusters have
# the same majority, add a numeric suffix (1, 2, etc)

# get the most common cell class for each cluster
df_glutamatergic <- dlpfc_glutamatergic@meta.data %>% 
  dplyr::select(Clusters_subset_Glutamatergic, predicted.subclass) %>% 
  group_by(Clusters_subset_Glutamatergic, predicted.subclass) %>%
  dplyr::summarise(n_per = dplyr::n()) %>% 
  top_n(1, n_per) %>%
  # add suffixes to clusters with the same majority vote, only if there's more than one cluster
  group_by(predicted.subclass) %>%
  dplyr::mutate(n_per_label = dplyr::n()) %>% 
  dplyr::mutate(new_label = case_when(
    n_per_label > 1 ~ paste0(predicted.subclass, "_", row_number()),
    TRUE ~ predicted.subclass
  ))

## `summarise()` has grouped output by 'Clusters_subset_Glutamatergic'. You can
## override using the `.groups` argument.

df_glutamatergic

dlpfc_glutamatergic@meta.data$cell_type_final <- plyr::mapvalues(
  dlpfc_glutamatergic@meta.data$Clusters_subset_Glutamatergic,
  from = df_glutamatergic$Clusters_subset_Glutamatergic,
  to = df_glutamatergic$new_label)

Idents(dlpfc_glutamatergic) <- "cell_type_final"

plot_dr(dlpfc_glutamatergic, color_by = "predicted.subclass", point_size = 0.2, 
        label = TRUE, reduction = "harm.wnn.umap_batch", colors = palette_labels, 
        label_size = 5, legend = TRUE, 
        title = glue("DLPFC glutamatergic neurons, final cell type labels, \nn = {ncol(dlpfc_glutamatergic)}"))

saveRDS(dlpfc_glutamatergic, file = file.path(
  merged_dir, "DLPFC.subset_glutamatergic.seurat.Rds"))

# call markers on subsetted object
markers_glutamatergic <- FindAllMarkers(dlpfc_glutamatergic, only.pos = TRUE, 
                                        min.pct = 0.25, logfc.threshold = 0.25)

## Calculating cluster L2/3 IT_1

## Calculating cluster L5 IT_1

## Calculating cluster L2/3 IT_2

## Calculating cluster L6 IT_1

## Calculating cluster L5 IT_2

## Calculating cluster L2/3 IT_3

## Calculating cluster L6b

## Calculating cluster L6 CT_1

## Calculating cluster L2/3 IT_4

## Calculating cluster L2/3 IT_5

## Calculating cluster L5/6 NP

## Calculating cluster L5 IT_3

## Calculating cluster L6 IT Car3

## Calculating cluster L5 IT_4

## Calculating cluster L6 IT_2

## Calculating cluster L5 IT_5

## Calculating cluster L5 ET

## Calculating cluster L6 CT_2

write_tsv(markers_glutamatergic, file.path(
  merged_dir, "DLPFC.subset_glutamatergic.markers.tsv"))

top5 <- markers_glutamatergic %>%
    group_by(cluster) %>%
    slice_max(n = 5, order_by = avg_log2FC)
DoHeatmap(subset(x = dlpfc_glutamatergic, downsample = 100), 
          features = top5$gene) + viridis::scale_fill_viridis()

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

dlpfc_glutamatergic@meta.data %>%
    ggplot(aes(x = seurat_clusters)) +
    geom_bar(aes(fill = condition), position = "fill")

dlpfc_glutamatergic@meta.data %>%
    ggplot(aes(x = seurat_clusters)) +
    geom_bar(aes(fill = sample), position = "fill")

4.2 GABAergic

plot_dr(dlpfc, color_by = "predicted.subclass", point_size = 0.2, label = FALSE, 
        reduction = "harm.wnn.umap_batch", colors = palette_labels,
    cells = WhichCells(dlpfc, expr = predicted.cell_class == "GABAergic"),
    label_size = 5, legend = FALSE,
    title = "DLPFC, highlighting GABAergic cells")

dlpfc_gabaergic <- subset(dlpfc, cells = WhichCells(
  dlpfc, expr = predicted.cell_class_percluster == "GABAergic"))
table(dlpfc_gabaergic$predicted.cell_class_percluster)

## 
##  Non-neuronal Glutamatergic     GABAergic 
##             0             0         19066

dlpfc_gabaergic <- subcluster_seurat(dlpfc_gabaergic, key = "GABAergic")

## Performing TF-IDF normalization

## Running SVD

## Scaling cell embeddings

cowplot::plot_grid(umap(dlpfc_gabaergic, color_by = "predicted.subclass", 
                        colors = palette_labels), umap(
                          dlpfc_gabaergic, 
                          color_by = "Clusters_subset_GABAergic"))

# added by James: bar plot for cell type percentage per cluster
dlpfc_gabaergic@meta.data %>%
  ggplot(aes(x = seurat_clusters)) +
  geom_bar(aes(fill = predicted.subclass), position = "fill") +
  scale_fill_manual(values = palette_labels) +
  ylab('proportion')+
  ggtitle("dlpfc cluster breakdown for gabaergic")

# for each subcluster, first get the majority vote. Then, if any clusters have
# the same majority, add a numeric suffix (1, 2, etc)

# get the most common cell class for each cluster
df_gabaergic <- dlpfc_gabaergic@meta.data %>% 
  dplyr::select(Clusters_subset_GABAergic, predicted.subclass) %>% 
  group_by(Clusters_subset_GABAergic, predicted.subclass) %>%
  dplyr::summarise(n_per = dplyr::n()) %>% 
  top_n(1, n_per) %>%
  # add suffixes to clusters with the same majority vote, only if there's more than one cluster
  group_by(predicted.subclass) %>%
  dplyr::mutate(n_per_label = dplyr::n()) %>% 
  dplyr::mutate(new_label = case_when(
    n_per_label > 1 ~ paste0(predicted.subclass, "_", row_number()),
    TRUE ~ predicted.subclass
  ))

## `summarise()` has grouped output by 'Clusters_subset_GABAergic'. You can
## override using the `.groups` argument.

df_gabaergic

dlpfc_gabaergic@meta.data$cell_type_final <- plyr::mapvalues(
  dlpfc_gabaergic@meta.data$Clusters_subset_GABAergic,
  from = df_gabaergic$Clusters_subset_GABAergic,
  to = df_gabaergic$new_label)

Idents(dlpfc_gabaergic) <- "cell_type_final"

saveRDS(dlpfc_gabaergic, file = file.path(merged_dir, 
                                          "DLPFC.subset_gabaergic.seurat.Rds"))

plot_dr(dlpfc_gabaergic, color_by = "predicted.subclass", point_size = 0.2, 
        label = TRUE, reduction = "harm.wnn.umap_batch", colors = palette_labels,
    label_size = 5, legend = TRUE,
    title = glue("DLPFC GABAergic neurons, final cell type labels, \nn = {ncol(dlpfc_gabaergic)}"))

# call markers on subsetted object
markers_gabaergic <- FindAllMarkers(dlpfc_gabaergic, only.pos = TRUE, 
                                    min.pct = 0.25, logfc.threshold = 0.25)

## Calculating cluster Pvalb_1

## Calculating cluster Sst_1

## Calculating cluster Vip_1

## Calculating cluster Vip_2

## Calculating cluster Sst_2

## Calculating cluster Lamp5_1

## Calculating cluster Sncg_1

## Calculating cluster Sst_3

## Calculating cluster Pvalb_2

## Calculating cluster Lamp5_2

## Calculating cluster Sncg_2

## Calculating cluster Sst_4

## Calculating cluster Sst_5

## Calculating cluster Sst Chodl

## Calculating cluster Pvalb_3

write_tsv(markers_gabaergic, file.path(merged_dir, 
                                       "DLPFC.subset_gabaergic.markers.tsv"))

top5 <- markers_gabaergic %>%
    group_by(cluster) %>%
    slice_max(n = 5, order_by = avg_log2FC)
DoHeatmap(subset(x = dlpfc_gabaergic, downsample = 100), 
          features = top5$gene) + viridis::scale_fill_viridis()

## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

dlpfc_gabaergic@meta.data %>%
    ggplot(aes(x = seurat_clusters)) +
    geom_bar(aes(fill = condition), position = "fill")

dlpfc_gabaergic@meta.data %>%
    ggplot(aes(x = seurat_clusters)) +
    geom_bar(aes(fill = sample), position = "fill")

4.3 Assemble final annotations

Specify certain subclusters to label as “unknown” due to lack of clear markers or mixed annotations.

# define clusters to drop, then convert the column back to character; and replace those labels
Idents(dlpfc_glutamatergic) <- "seurat_clusters"
clusters_unk_glut <- c("3", "4", "14")
cells_unk_glut <- WhichCells(dlpfc_glutamatergic, idents = clusters_unk_glut)
dlpfc_glutamatergic$cell_type_final <- as.character(dlpfc_glutamatergic$cell_type_final)
dlpfc_glutamatergic$cell_type_final[cells_unk_glut] <- "Unknown"

Idents(dlpfc_gabaergic) <- "seurat_clusters"
clusters_unk_gaba <- c("13", "14", "15")
cells_unk_gaba <- WhichCells(dlpfc_gabaergic, idents = clusters_unk_gaba)
dlpfc_gabaergic$cell_type_final <- as.character(dlpfc_gabaergic$cell_type_final)
dlpfc_gabaergic$cell_type_final[cells_unk_gaba] <- "Unknown"

# as a default, set the final cell type to the majority cell type per cluster
dlpfc$cell_type_final <- as.character(dlpfc$predicted.cell_type_percluster)

dlpfc@meta.data$Clusters_all <- dlpfc@meta.data$seurat_clusters
dlpfc@meta.data$Clusters_subset_glutamatergic <- NA
dlpfc@meta.data$Clusters_subset_GABAergic <- NA

# add glutamatergic subclusters into main object
cells_glut <- Cells(dlpfc_glutamatergic)
dlpfc@meta.data[cells_glut, ]$Clusters_subset_glutamatergic <- dlpfc_glutamatergic@meta.data$Clusters_subset_Glutamatergic
dlpfc@meta.data[cells_glut, ]$cell_type_final <- as.character(
  dlpfc_glutamatergic@meta.data$cell_type_final)

# add gabaergic subclusters into main object
cells_gab <- Cells(dlpfc_gabaergic)
dlpfc@meta.data[cells_gab, ]$Clusters_subset_GABAergic <- dlpfc_gabaergic@meta.data$Clusters_subset_GABAergic
dlpfc@meta.data[cells_gab, ]$cell_type_final <- as.character(
  dlpfc_gabaergic@meta.data$cell_type_final)

# relabel with harmonized cell type labels
label_map_new <- data.frame(Label_granular = unique(dlpfc@meta.data$cell_type_final)) %>% 
  mutate(Label_granular_old = Label_granular) %>% 
  separate(Label_granular, into = c("Label_wo_suffix", "suffix"), sep = "_") %>% 
  left_join(labels, by = c("Label_wo_suffix" = "Label")) %>% 
  mutate(Label_granular = case_when(
    !is.na(suffix) ~ paste0(Label_harmonized, "_", suffix),
    TRUE ~ Label_harmonized
  )) %>% 
  arrange(Order)

dlpfc@meta.data$cell_type_granular <- plyr::mapvalues(dlpfc@meta.data$cell_type_final,
                                                  from = label_map_new$Label_granular_old,
                                                  to = label_map_new$Label_granular)

dlpfc@meta.data$cell_type_broad <- plyr::mapvalues(dlpfc@meta.data$cell_type_final,
                                                  from = label_map_new$Label_granular_old,
                                                  to = label_map_new$Label_harmonized)

dlpfc@meta.data$cell_type_final <- NULL

# put the labels in the cell type order from our label map, to get a rational order
dlpfc_order <- label_map_new %>%
  dplyr::filter(Label_granular %in% dlpfc$cell_type_granular) %>%
  pull(Label_granular) %>% base::unique()
dlpfc_order

##  [1] "OPC"                     "Oligo"                  
##  [3] "Astro"                   "Microglia PVM"          
##  [5] "Endo"                    "Vascular leptomeningeal"
##  [7] "VIP_1"                   "VIP_2"                  
##  [9] "SST_4"                   "SST_2"                  
## [11] "SST_1"                   "SST_3"                  
## [13] "PVALB_1"                 "PVALB_2"                
## [15] "LAMP5_2"                 "LAMP5_1"                
## [17] "SNCG_2"                  "SNCG_1"                 
## [19] "L23 IT_1"                "L23 IT_3"               
## [21] "L23 IT_4"                "L23 IT_5"               
## [23] "L5 IT_1"                 "L5 IT_3"                
## [25] "L5 IT_2"                 "L5 IT_5"                
## [27] "L5 ET"                   "L56 NP"                 
## [29] "L6b"                     "L6 IT_2"                
## [31] "L6 IT CAR3"              "L6 CT_1"                
## [33] "L6 CT_2"                 "Unknown"

dlpfc_order_broad <- labels %>%
  dplyr::filter(Label_harmonized %in% unique(dlpfc$cell_type_broad)) %>%
  pull(Label_harmonized) %>%
  base::unique()
dlpfc_order_broad

##  [1] "OPC"                     "Oligo"                  
##  [3] "Astro"                   "Microglia PVM"          
##  [5] "Endo"                    "Vascular leptomeningeal"
##  [7] "VIP"                     "SST"                    
##  [9] "PVALB"                   "LAMP5"                  
## [11] "SNCG"                    "L23 IT"                 
## [13] "L5 IT"                   "L5 ET"                  
## [15] "L56 NP"                  "L6b"                    
## [17] "L6 IT"                   "L6 IT CAR3"             
## [19] "L6 CT"                   "Unknown"

# put the labels in the cell type order from our label map, to get a rational order
dlpfc$cell_type_broad <- factor(dlpfc$cell_type_broad, levels = dlpfc_order_broad)
dlpfc$cell_type_granular <- factor(dlpfc$cell_type_granular, levels = dlpfc_order)
Idents(dlpfc) <- "cell_type_granular"

plot_dr(dlpfc, color_by = "cell_type_broad", point_size = 0.2, label = TRUE,
        reduction = "harm.wnn.umap_batch", colors = palette_labels_harmonized,
    label_size = 5, legend = TRUE,
    title = glue("DLPFC AD, final cell type labels, n = {ncol(dlpfc)}"))

Idents(dlpfc) <- "cell_type_broad"
plot_dr(dlpfc, color_by = "cell_type_broad", point_size = 0.2, label = TRUE,
        reduction = "harm.wnn.umap_batch", colors = palette_labels_harmonized,
        label_size = 5, legend = TRUE,
        title = glue("DLPFC, final cell type labels, broad, n = {ncol(dlpfc)}"))

Plot cluster numbers:

dlpfc@meta.data %>% 
  group_by(cell_type_broad, cell_type_granular) %>% 
  count() %>% 
  ggplot(aes(x = cell_type_granular, y = n)) +
  geom_bar(aes(fill = cell_type_broad), stat = "identity") +
  geom_text(aes(label = n, color = cell_type_broad), hjust = -0.1, angle = 90) +
  scale_fill_manual(values = palette_labels_harmonized) +
  scale_color_manual(values = palette_labels_harmonized) +
  rotate_x() +
  ggtitle("Number of cells per granular cluster")

dlpfc@meta.data %>% 
  group_by(cell_type_broad) %>% 
  count() %>% 
  ggplot(aes(x = cell_type_broad, y = log10(n))) +
  geom_bar(aes(fill = cell_type_broad), stat = "identity") +
  geom_text(aes(label = n, color = cell_type_broad), hjust = -0.1, angle = 90) +
  scale_fill_manual(values = palette_labels_harmonized) +
  scale_color_manual(values = palette_labels_harmonized) +
  rotate_x() +
  ggtitle("Number of cells per broad cluster")

dlpfc@meta.data %>% 
  group_by(cell_type_broad) %>% 
  summarize(sum_ncount = sum(nCount_ATAC)) %>% 
  ggplot(aes(x = cell_type_broad, y = log10(sum_ncount))) +
  geom_bar(aes(fill = cell_type_broad), stat = "identity") +
  geom_text(aes(label = sum_ncount, color = cell_type_broad), hjust = -0.1, 
            angle = 90) +
  scale_fill_manual(values = palette_labels_harmonized) +
  scale_color_manual(values = palette_labels_harmonized) +
  rotate_x() +
  ggtitle("Number of fragments in peaks, per broad cluster") +
  coord_cartesian(ylim = c(0, 10))

DotPlot(dlpfc, assay = "RNA", features = rev(c(
    "PDGFRA",
    "PLP1",
    "AQP4",
    "LY86",
    "VWF",
    "GAD1",
    "VIP",
    "SST",
    "PVALB",
    "LAMP5",
    "SLC17A8",
    "SLC17A7",
    "CUX2",
    # "FREM3",
    "RORB",
    "FEZF2",
    "FOXP2",
    "NXPH3",
    "SULF1"
))) +
    ylab("Cell type") +
    coord_flip() +
    rotate_x()

DotPlot(dlpfc, assay = "RNA", group.by = "cell_type_broad", features = rev(c(
  "PDGFRA",
  "PLP1",
  "AQP4",
  "LY86",
  "VWF",
  "COL1A2",
  "GAD1",
  "VIP",
  "SST",
  "PVALB",
  "LAMP5",
  "SLC17A8",
  "SLC17A7",
  "CUX2",
  "LHX2",
  "RORB",
  "FEZF2",
  "FOXP2",
  "TLE4",
  "NXPH3",
  "SULF1"
))) +
  ylab("Cell type") +
  coord_flip() +
  rotate_x()

# we can reload labels for later analysis.
dlpfc_meta <- dlpfc@meta.data
saveRDS(dlpfc_meta, file = file.path(
  merged_dir, "DLPFC.integrated.batch.analyzed.cell_metadata.labeled.rds"))

5 Call markers

Idents(dlpfc) <- "cell_type_broad"
markers_dlpfc <- FindAllMarkers(dlpfc, only.pos = TRUE, logfc.threshold = 0.25, 
                                max.cells.per.ident = 1000) %>%
        mutate(pct.diff = pct.1 - pct.2)

## Calculating cluster OPC

## Calculating cluster Oligo

## Calculating cluster Astro

## Calculating cluster Microglia PVM

## Calculating cluster Endo

## Calculating cluster Vascular leptomeningeal

## Calculating cluster VIP

## Calculating cluster SST

## Calculating cluster PVALB

## Calculating cluster LAMP5

## Calculating cluster SNCG

## Calculating cluster L23 IT

## Calculating cluster L5 IT

## Calculating cluster L5 ET

## Calculating cluster L56 NP

## Calculating cluster L6b

## Calculating cluster L6 IT

## Calculating cluster L6 IT CAR3

## Calculating cluster L6 CT

## Calculating cluster Unknown

write_tsv(markers_dlpfc, file.path(merged_dir, 
                                   "DLPFC.markers.cell_type_broad.tsv"))

6 Session info

sessionInfo()

## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/libf77blas.so.3.10.3
## LAPACK: /users/guozy/miniconda3/envs/psych/lib/libopenblasp-r0.3.27.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-3 viridis_0.6.4      viridisLite_0.4.2  Signac_1.9.0      
##  [5] ggplot2_3.5.1      SeuratObject_5.0.0 Seurat_4.3.0       glue_1.6.2        
##  [9] readr_2.1.4        purrr_1.0.2        dplyr_1.1.2        tidyr_1.3.0       
## [13] here_1.0.1         Cairo_1.6-2       
## 
## loaded via a namespace (and not attached):
##   [1] spam_2.9-1             fastmatch_1.1-4        plyr_1.8.9            
##   [4] igraph_2.0.3           lazyeval_0.2.2         sp_2.1-4              
##   [7] splines_4.2.0          BiocParallel_1.32.6    listenv_0.9.1         
##  [10] scattermore_1.2        GenomeInfoDb_1.34.9    digest_0.6.35         
##  [13] htmltools_0.5.8.1      fansi_1.0.6            magrittr_2.0.3        
##  [16] tensor_1.5             cluster_2.1.6          ROCR_1.0-11           
##  [19] limma_3.54.2           tzdb_0.4.0             globals_0.16.3        
##  [22] Biostrings_2.66.0      matrixStats_1.0.0      vroom_1.6.5           
##  [25] spatstat.sparse_3.0-3  colorspace_2.1-0       rappdirs_0.3.3        
##  [28] ggrepel_0.9.5          xfun_0.44              crayon_1.5.2          
##  [31] RCurl_1.98-1.14        jsonlite_1.8.8         progressr_0.14.0      
##  [34] spatstat.data_3.0-4    survival_3.6-4         zoo_1.8-12            
##  [37] polyclip_1.10-6        gtable_0.3.5           zlibbioc_1.44.0       
##  [40] XVector_0.38.0         leiden_0.4.3.1         future.apply_1.11.2   
##  [43] BiocGenerics_0.44.0    abind_1.4-5            scales_1.3.0          
##  [46] spatstat.random_3.2-3  miniUI_0.1.1.1         Rcpp_1.0.11           
##  [49] xtable_1.8-4           reticulate_1.37.0      bit_4.0.5             
##  [52] dotCall64_1.1-1        stats4_4.2.0           htmlwidgets_1.6.4     
##  [55] httr_1.4.7             ica_1.0-3              farver_2.1.2          
##  [58] pkgconfig_2.0.3        sass_0.4.9             uwot_0.2.2            
##  [61] deldir_2.0-4           utf8_1.2.4             labeling_0.4.3        
##  [64] tidyselect_1.2.1       rlang_1.1.4            reshape2_1.4.4        
##  [67] later_1.3.2            munsell_0.5.1          tools_4.2.0           
##  [70] cachem_1.1.0           cli_3.6.2              generics_0.1.3        
##  [73] ggridges_0.5.6         evaluate_0.24.0        stringr_1.5.1         
##  [76] fastmap_1.2.0          yaml_2.3.8             goftest_1.2-3         
##  [79] bit64_4.0.5            knitr_1.47             fitdistrplus_1.1-11   
##  [82] RANN_2.6.1             pbapply_1.7-2          future_1.33.2         
##  [85] nlme_3.1-164           mime_0.12              RcppRoll_0.3.0        
##  [88] compiler_4.2.0         plotly_4.10.4          png_0.1-8             
##  [91] spatstat.utils_3.0-4   tibble_3.2.1           bslib_0.7.0           
##  [94] stringi_1.8.4          highr_0.11             lattice_0.22-6        
##  [97] Matrix_1.6-1.1         vctrs_0.6.5            pillar_1.9.0          
## [100] lifecycle_1.0.4        spatstat.geom_3.2-9    lmtest_0.9-40         
## [103] jquerylib_0.1.4        RcppAnnoy_0.0.22       data.table_1.14.8     
## [106] cowplot_1.1.3          bitops_1.0-7           irlba_2.3.5.1         
## [109] httpuv_1.6.15          patchwork_1.2.0        GenomicRanges_1.50.2  
## [112] R6_2.5.1               promises_1.3.0         KernSmooth_2.23-20    
## [115] gridExtra_2.3          IRanges_2.32.0         parallelly_1.37.1     
## [118] codetools_0.2-20       MASS_7.3-56            rprojroot_2.0.4       
## [121] withr_3.0.0            sctransform_0.4.0      Rsamtools_2.14.0      
## [124] S4Vectors_0.36.2       GenomeInfoDbData_1.2.9 parallel_4.2.0        
## [127] hms_1.1.3              grid_4.2.0             rmarkdown_2.27        
## [130] Rtsne_0.17             spatstat.explore_3.2-7 shiny_1.8.1.1

Subclustering DLPFC

11 July, 2024