Added by James: This Rmd file is adapted from
04-overview.ipynb
, but uses cell type annotations after
subclustering.
library(tidyr)
library(dplyr)
library(purrr)
library(readr)
library(glue)
library(ggplot2)
library(Signac)
library(Seurat)
source("/users/guozy/psychencode/functions_r/scRNAseq.R")
theme_set(theme_min())
# load labels and color scheme
labels <- readr::read_tsv("label_map.tsv") %>%
tibble::rowid_to_column(var = "Order")
## Rows: 60 Columns: 5
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Class, Label, Color, Label_harmonized, Label_chrombpnet
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
palette_labels <- labels %>% dplyr::select(Label, Color) %>% tibble::deframe()
palette_labels_harmonized <- labels %>% distinct(Label_harmonized, Color) %>%
tibble::deframe()
palette_condition <- c("Control" = "gray70",
"Bipolar" = "darkolivegreen",
"Schizophrenia" = "darkorchid4")
labels
merged_dir <- "/oak/stanford/groups/akundaje/projects/psychencode/outs/20231009_multiome_analysis/03-integration/merged_objects"
# added by James: use cell type annotations from after the subclustering
dacc <- readRDS(file.path(merged_dir, "dACC.integrated.batch.analyzed.cell_metadata.labeled.rds"))
dlpfc <- readRDS(file.path(merged_dir, "DLPFC.integrated.batch.analyzed.cell_metadata.labeled.rds"))
hippocampus <- readRDS(file.path(merged_dir, "Hippocampus.integrated.batch.analyzed.cell_metadata.labeled.rds"))
Here let’s perform a global overview of QC.
qc_table_file <- readr::read_tsv('/oak/stanford/groups/akundaje/projects/psychencode/outs/20231009_multiome_analysis/metadata/multiome.qc.tsv', show_col_types=FALSE)
length(unique(qc_table_file$subject_id))
## [1] 40
head(qc_table_file)
all_metadata <- bind_rows(dacc, dlpfc, hippocampus)
# deal with a naming issue in region columns in seurat objects
all_metadata$region <- all_metadata$region.x
all_metadata$region.x <- NULL
all_metadata$region.y <- NULL
all_metadata <- all_metadata %>%
arrange(region) %>%
mutate(sample = factor(sample, levels = unique(.$sample)))
colnames(all_metadata) |>
sort()
## [1] "age"
## [2] "all_doublets"
## [3] "atac_barcode"
## [4] "atac_chimeric_reads"
## [5] "atac_doublets"
## [6] "atac_dup_reads"
## [7] "atac_fragments"
## [8] "atac_lowmapq"
## [9] "atac_mitochondrial_reads"
## [10] "atac_peak_region_cutsites"
## [11] "atac_peak_region_fragments"
## [12] "atac_raw_reads"
## [13] "atac_TSS_fragments"
## [14] "atac_unmapped_reads"
## [15] "ATAC.weight"
## [16] "barcode"
## [17] "batch"
## [18] "cell_type_broad"
## [19] "cell_type_granular"
## [20] "Clusters_all"
## [21] "Clusters_subset_GABAergic"
## [22] "Clusters_subset_glutamatergic"
## [23] "condition"
## [24] "DF.classifications_0.25_0.01_112"
## [25] "DF.classifications_0.25_0.01_113"
## [26] "DF.classifications_0.25_0.01_114"
## [27] "DF.classifications_0.25_0.01_115"
## [28] "DF.classifications_0.25_0.01_117"
## [29] "DF.classifications_0.25_0.01_118"
## [30] "DF.classifications_0.25_0.01_12"
## [31] "DF.classifications_0.25_0.01_120"
## [32] "DF.classifications_0.25_0.01_122"
## [33] "DF.classifications_0.25_0.01_123"
## [34] "DF.classifications_0.25_0.01_126"
## [35] "DF.classifications_0.25_0.01_127"
## [36] "DF.classifications_0.25_0.01_134"
## [37] "DF.classifications_0.25_0.01_135"
## [38] "DF.classifications_0.25_0.01_138"
## [39] "DF.classifications_0.25_0.01_140"
## [40] "DF.classifications_0.25_0.01_141"
## [41] "DF.classifications_0.25_0.01_142"
## [42] "DF.classifications_0.25_0.01_144"
## [43] "DF.classifications_0.25_0.01_182"
## [44] "DF.classifications_0.25_0.01_183"
## [45] "DF.classifications_0.25_0.01_184"
## [46] "DF.classifications_0.25_0.01_185"
## [47] "DF.classifications_0.25_0.01_186"
## [48] "DF.classifications_0.25_0.01_188"
## [49] "DF.classifications_0.25_0.01_193"
## [50] "DF.classifications_0.25_0.01_195"
## [51] "DF.classifications_0.25_0.01_196"
## [52] "DF.classifications_0.25_0.01_198"
## [53] "DF.classifications_0.25_0.01_199"
## [54] "DF.classifications_0.25_0.01_201"
## [55] "DF.classifications_0.25_0.01_203"
## [56] "DF.classifications_0.25_0.01_209"
## [57] "DF.classifications_0.25_0.01_213"
## [58] "DF.classifications_0.25_0.01_215"
## [59] "DF.classifications_0.25_0.01_268"
## [60] "DF.classifications_0.25_0.01_269"
## [61] "DF.classifications_0.25_0.01_272"
## [62] "DF.classifications_0.25_0.01_280"
## [63] "DF.classifications_0.25_0.01_283"
## [64] "DF.classifications_0.25_0.01_285"
## [65] "DF.classifications_0.25_0.01_287"
## [66] "DF.classifications_0.25_0.01_289"
## [67] "DF.classifications_0.25_0.01_293"
## [68] "DF.classifications_0.25_0.01_295"
## [69] "DF.classifications_0.25_0.01_30"
## [70] "DF.classifications_0.25_0.01_31"
## [71] "DF.classifications_0.25_0.01_35"
## [72] "DF.classifications_0.25_0.01_36"
## [73] "DF.classifications_0.25_0.01_37"
## [74] "DF.classifications_0.25_0.01_370"
## [75] "DF.classifications_0.25_0.01_374"
## [76] "DF.classifications_0.25_0.01_379"
## [77] "DF.classifications_0.25_0.01_39"
## [78] "DF.classifications_0.25_0.01_403"
## [79] "DF.classifications_0.25_0.01_408"
## [80] "DF.classifications_0.25_0.01_409"
## [81] "DF.classifications_0.25_0.01_410"
## [82] "DF.classifications_0.25_0.01_413"
## [83] "DF.classifications_0.25_0.01_493"
## [84] "DF.classifications_0.25_0.01_500"
## [85] "DF.classifications_0.25_0.01_503"
## [86] "DF.classifications_0.25_0.01_6"
## [87] "DF.classifications_0.25_0.01_60"
## [88] "DF.classifications_0.25_0.01_61"
## [89] "DF.classifications_0.25_0.01_62"
## [90] "DF.classifications_0.25_0.01_64"
## [91] "DF.classifications_0.25_0.01_65"
## [92] "DF.classifications_0.25_0.01_652"
## [93] "DF.classifications_0.25_0.01_68"
## [94] "DF.classifications_0.25_0.01_69"
## [95] "DF.classifications_0.25_0.01_71"
## [96] "DF.classifications_0.25_0.01_73"
## [97] "DF.classifications_0.25_0.01_75"
## [98] "DF.classifications_0.25_0.01_76"
## [99] "DF.classifications_0.25_0.01_77"
## [100] "DF.classifications_0.25_0.01_8"
## [101] "DF.classifications_0.25_0.01_83"
## [102] "donor"
## [103] "excluded_reason"
## [104] "gex_barcode"
## [105] "gex_conf_exonic_antisense_reads"
## [106] "gex_conf_exonic_dup_reads"
## [107] "gex_conf_exonic_reads"
## [108] "gex_conf_exonic_unique_reads"
## [109] "gex_conf_intergenic_reads"
## [110] "gex_conf_intronic_antisense_reads"
## [111] "gex_conf_intronic_dup_reads"
## [112] "gex_conf_intronic_reads"
## [113] "gex_conf_intronic_unique_reads"
## [114] "gex_conf_txomic_unique_reads"
## [115] "gex_exonic_umis"
## [116] "gex_genes_count"
## [117] "gex_intronic_umis"
## [118] "gex_mapped_reads"
## [119] "gex_raw_reads"
## [120] "gex_umis_count"
## [121] "is_cell"
## [122] "mapping.score"
## [123] "nCount_ATAC"
## [124] "nCount_refAssay"
## [125] "nCount_RNA"
## [126] "nCount_SCT"
## [127] "nFeature_ATAC"
## [128] "nFeature_refAssay"
## [129] "nFeature_RNA"
## [130] "nFeature_SCT"
## [131] "nucleosome_percentile"
## [132] "nucleosome_signal"
## [133] "orig.ident"
## [134] "pANN_0.25_0.01_112"
## [135] "pANN_0.25_0.01_113"
## [136] "pANN_0.25_0.01_114"
## [137] "pANN_0.25_0.01_115"
## [138] "pANN_0.25_0.01_117"
## [139] "pANN_0.25_0.01_118"
## [140] "pANN_0.25_0.01_12"
## [141] "pANN_0.25_0.01_120"
## [142] "pANN_0.25_0.01_122"
## [143] "pANN_0.25_0.01_123"
## [144] "pANN_0.25_0.01_126"
## [145] "pANN_0.25_0.01_127"
## [146] "pANN_0.25_0.01_134"
## [147] "pANN_0.25_0.01_135"
## [148] "pANN_0.25_0.01_138"
## [149] "pANN_0.25_0.01_140"
## [150] "pANN_0.25_0.01_141"
## [151] "pANN_0.25_0.01_142"
## [152] "pANN_0.25_0.01_144"
## [153] "pANN_0.25_0.01_182"
## [154] "pANN_0.25_0.01_183"
## [155] "pANN_0.25_0.01_184"
## [156] "pANN_0.25_0.01_185"
## [157] "pANN_0.25_0.01_186"
## [158] "pANN_0.25_0.01_188"
## [159] "pANN_0.25_0.01_193"
## [160] "pANN_0.25_0.01_195"
## [161] "pANN_0.25_0.01_196"
## [162] "pANN_0.25_0.01_198"
## [163] "pANN_0.25_0.01_199"
## [164] "pANN_0.25_0.01_201"
## [165] "pANN_0.25_0.01_203"
## [166] "pANN_0.25_0.01_209"
## [167] "pANN_0.25_0.01_213"
## [168] "pANN_0.25_0.01_215"
## [169] "pANN_0.25_0.01_268"
## [170] "pANN_0.25_0.01_269"
## [171] "pANN_0.25_0.01_272"
## [172] "pANN_0.25_0.01_280"
## [173] "pANN_0.25_0.01_283"
## [174] "pANN_0.25_0.01_285"
## [175] "pANN_0.25_0.01_287"
## [176] "pANN_0.25_0.01_289"
## [177] "pANN_0.25_0.01_293"
## [178] "pANN_0.25_0.01_295"
## [179] "pANN_0.25_0.01_30"
## [180] "pANN_0.25_0.01_31"
## [181] "pANN_0.25_0.01_35"
## [182] "pANN_0.25_0.01_36"
## [183] "pANN_0.25_0.01_37"
## [184] "pANN_0.25_0.01_370"
## [185] "pANN_0.25_0.01_374"
## [186] "pANN_0.25_0.01_379"
## [187] "pANN_0.25_0.01_39"
## [188] "pANN_0.25_0.01_403"
## [189] "pANN_0.25_0.01_408"
## [190] "pANN_0.25_0.01_409"
## [191] "pANN_0.25_0.01_410"
## [192] "pANN_0.25_0.01_413"
## [193] "pANN_0.25_0.01_493"
## [194] "pANN_0.25_0.01_500"
## [195] "pANN_0.25_0.01_503"
## [196] "pANN_0.25_0.01_6"
## [197] "pANN_0.25_0.01_60"
## [198] "pANN_0.25_0.01_61"
## [199] "pANN_0.25_0.01_62"
## [200] "pANN_0.25_0.01_64"
## [201] "pANN_0.25_0.01_65"
## [202] "pANN_0.25_0.01_652"
## [203] "pANN_0.25_0.01_68"
## [204] "pANN_0.25_0.01_69"
## [205] "pANN_0.25_0.01_71"
## [206] "pANN_0.25_0.01_73"
## [207] "pANN_0.25_0.01_75"
## [208] "pANN_0.25_0.01_76"
## [209] "pANN_0.25_0.01_77"
## [210] "pANN_0.25_0.01_8"
## [211] "pANN_0.25_0.01_83"
## [212] "pct_frags_in_peaks"
## [213] "percent.mt"
## [214] "predicted.cell_class"
## [215] "predicted.cell_class_percluster"
## [216] "predicted.cell_type_percluster"
## [217] "predicted.class"
## [218] "predicted.class__humancortexref"
## [219] "predicted.class.score"
## [220] "predicted.class.score__humancortexref"
## [221] "predicted.cluster"
## [222] "predicted.cluster__humancortexref"
## [223] "predicted.cluster_broad"
## [224] "predicted.cluster_broad.score"
## [225] "predicted.cluster.score"
## [226] "predicted.cluster.score__humancortexref"
## [227] "predicted.cross_species_cluster"
## [228] "predicted.cross_species_cluster__humancortexref"
## [229] "predicted.cross_species_cluster.score"
## [230] "predicted.cross_species_cluster.score__humancortexref"
## [231] "predicted.subclass"
## [232] "predicted.subclass__humancortexref"
## [233] "predicted.subclass.score"
## [234] "predicted.subclass.score__humancortexref"
## [235] "race"
## [236] "region"
## [237] "rna_doublets"
## [238] "sample"
## [239] "SCT_snn_res.0.8"
## [240] "SCT.weight"
## [241] "seurat_clusters"
## [242] "sex"
## [243] "source"
## [244] "TSS.enrichment"
## [245] "TSS.percentile"
## [246] "wsnn_batch_res.0.6"
## [247] "wsnn_res.0.6"
# number of cells
all_metadata %>%
ggplot(aes(x = sample)) +
# geom_violin(scale = "width", aes(fill = region)) +
geom_bar(aes(fill = region)) +
coord_cartesian(ylim = c(0, 7500)) +
ggtitle("# cells per sample") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 15, angle = 90, hjust = 1, vjust = 0.5),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
all_metadata %>%
ggplot(aes(x = sample, y = nFeature_RNA)) +
# geom_violin(scale = "width", aes(fill = region)) +
geom_boxplot(outlier.shape = NA, aes(fill = region)) +
coord_cartesian(ylim = c(0, 6500)) +
ggtitle("# genes detected across samples") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 16),
axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
all_metadata %>% ggplot(aes(x = sample, y = nCount_RNA)) +
# geom_violin(scale = "width", aes(fill = region)) +
geom_boxplot(outlier.shape = NA, aes(fill = region)) +
coord_cartesian(ylim = c(0, 35000)) +
ggtitle("# UMIs across samples") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 16),
axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
all_metadata %>% ggplot(aes(x = sample, y = percent.mt)) +
geom_boxplot(outlier.shape = NA, aes(fill = region)) +
ggtitle("% mitochondrial reads") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 16),
axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
all_metadata %>% ggplot(aes(x = sample, y = TSS.enrichment)) +
# geom_violin(scale = "width", aes(fill = region)) +
geom_boxplot(outlier.shape = NA, aes(fill = region)) +
coord_cartesian(ylim = c(1, 8)) +
ggtitle("TSS enrichment") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 17),
axis.text.x = element_text(size = 14, angle = 90, vjust = 0.5, hjust=1),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
all_metadata %>%
ggplot(aes(x = sample, y = nCount_ATAC)) +
# geom_violin(scale = "width", aes(fill = region)) +
geom_boxplot(outlier.shape = NA, aes(fill = region)) +
coord_cartesian(ylim = c(0, 20000)) +
ggtitle("# cut sites") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 15, angle = 90, vjust = 0.5, hjust=1),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
all_metadata %>% ggplot(aes(x = predicted.cell_type_percluster)) +
geom_bar(aes(fill = batch), position = "fill") +
ggtitle("batch") + NoLegend() + ylab("proportion") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 16),
axis.text.x = element_text(size = 14, angle = 90, vjust = 0.5, hjust=1),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
all_metadata %>% ggplot(aes(x = predicted.cell_type_percluster)) +
geom_bar(aes(fill = sample), position = "fill") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_text(size = 15)) +
ggtitle("sample") + NoLegend() + ylab("proportion") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
summary(all_metadata$nCount_RNA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 501 1592 3401 8473 8654 185099
summary(all_metadata$nFeature_RNA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 251 953 1653 2325 3060 11890
dim(all_metadata)
## [1] 417487 247
head(all_metadata)
# get final cell types
# added by James
all_metadata2 <- all_metadata %>%
select(barcode, sample, cell_type_broad, condition, region) %>%
mutate(cell_type_broad = as.factor(cell_type_broad))
head(all_metadata2)
all_metadata2 %>% ggplot(aes(x = sample)) +
geom_bar(aes(fill = cell_type_broad)) +
facet_wrap(~ region, nrow = 1, scales = "free_x") +
scale_fill_manual(values = palette_labels_harmonized) +
rotate_x() +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
all_metadata2 %>% ggplot(aes(x = sample)) +
geom_bar(aes(fill = cell_type_broad), position = "fill") +
facet_wrap(~ region, nrow = 1, scales = "free_x") +
scale_fill_manual(values = palette_labels_harmonized) +
rotate_x() +
ylab("proportion") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p1 <- all_metadata2 %>% ggplot(aes(x = cell_type_broad)) +
geom_bar(aes(fill = cell_type_broad)) +
facet_wrap(~ region, nrow = 1, scales = "free_x") +
scale_fill_manual(values = palette_labels_harmonized) +
rotate_x() +
theme(axis.text.x = element_blank(), axis.title.x = element_blank()) +
ylab("# cells") +
hide_legend() +
theme(
plot.title = element_text(size = 20),
# axis.title.x = element_text(size = 16),
# axis.text.x = element_text(size = 14),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
p2 <- all_metadata2 %>% ggplot(aes(x = cell_type_broad)) +
geom_bar(aes(fill = condition), position = "fill") +
facet_wrap(~ region, nrow = 1, scales = "free_x") +
rotate_x() +
theme(strip.text.x = element_blank()) +
ylab("% cells") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
cowplot::plot_grid(p1, p2, ncol = 1, rel_heights = c(0.3, 0.7), align = "v", axis = "rl")
Added by James:
old_merged_dir <- "/oak/stanford/groups/akundaje/projects/psychencode/outs-archive/20231009_multiome_analysis/03-integration/merged_objects"
dacc_metadata_old <- readRDS(file.path(old_merged_dir, "dACC/dACC.integrated.batch.analyzed.cell_metadata.labeled.rds"))
dlpfc_metadata_old <- readRDS(file.path(old_merged_dir, "DLPFC/DLPFC.integrated.batch.analyzed.cell_metadata.labeled.rds"))
hippocampus_metadata_old <- readRDS(file.path(old_merged_dir, "Hippocampus/Hippocampus.integrated.batch.analyzed.cell_metadata.labeled.rds"))
cell_type_count <- function(metadata) {
return(metadata %>%
group_by(cell_type_broad) %>%
summarise(n = n()) %>%
arrange(cell_type_broad))
}
dacc_old_cell_type_count <- dacc_metadata_old %>% cell_type_count()
dacc_new_cell_type_count <- dacc %>% cell_type_count()
dacc_cell_type_count_long <- full_join(dacc_old_cell_type_count,
dacc_new_cell_type_count,
by = "cell_type_broad") %>%
rename(archived = n.x, new = n.y) %>%
pivot_longer(cols = c(archived, new), names_to = "time_point", values_to = "count") %>%
replace_na(list(count = 0))
ggplot(dacc_cell_type_count_long, aes(x = cell_type_broad,
y = count, fill = time_point)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "dACC Cell Type Count Comparison: Archived and New", x = "Cell Type",
y = "Count") + theme(legend.title = element_blank()) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 16),
axis.text.x = element_text(size = 13, angle = 90, vjust = 0.5, hjust=1),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
dlpfc_old_cell_type_count <- dlpfc_metadata_old %>% cell_type_count()
dlpfc_new_cell_type_count <- dlpfc %>% cell_type_count()
dlpfc_cell_type_count_long <- full_join(dlpfc_old_cell_type_count,
dlpfc_new_cell_type_count,
by = "cell_type_broad") %>%
rename(archived = n.x, new = n.y) %>%
pivot_longer(cols = c(archived, new), names_to = "time_point", values_to = "count") %>%
replace_na(list(count = 0))
ggplot(dlpfc_cell_type_count_long, aes(x = cell_type_broad,
y = count, fill = time_point)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "DLPFC Cell Type Count Comparison: Archived and New", x = "Cell Type",
y = "Count") + theme(legend.title = element_blank()) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 16),
axis.text.x = element_text(size = 13 , angle = 90, vjust = 0.5, hjust=1),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
hippocampus_old_cell_type_count <- hippocampus_metadata_old %>% cell_type_count()
hippocampus_new_cell_type_count <- hippocampus %>% cell_type_count()
hippocampus_cell_type_count_long <- full_join(hippocampus_old_cell_type_count,
hippocampus_new_cell_type_count,
by = "cell_type_broad") %>%
rename(archived = n.x, new = n.y) %>%
pivot_longer(cols = c(archived, new), names_to = "time_point", values_to = "count") %>%
replace_na(list(count = 0))
ggplot(hippocampus_cell_type_count_long, aes(x = cell_type_broad,
y = count, fill = time_point)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Hippocampus Cell Type Count Comparison: Archived and New",
x = "Cell Type", y = "Count") + theme(legend.title = element_blank()) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 16),
axis.text.x = element_text(size = 13, angle = 90, vjust = 0.5, hjust=1),
axis.title.y = element_text(size = 16),
axis.text.y = element_text(size = 14),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
# collect cell_type_broad from all regions
cell_type_broad_all_regions <- rbind(data.frame(cell_type_broad = dacc$cell_type_broad,
region = 'dACC'),
data.frame(cell_type_broad = dlpfc$cell_type_broad,
region = 'DLPFC'),
data.frame(cell_type_broad =
hippocampus$cell_type_broad,
region = 'Hippocampus')) |>
setNames(c("Label_harmonized", "region"))
# remove duplicate labels
labels_distinct <- distinct(labels, Label_harmonized, .keep_all = TRUE)
# map by Label_harmonized
cell_type_broad_all_regions <- full_join(cell_type_broad_all_regions,
labels_distinct,
by = "Label_harmonized") |>
select(Label_harmonized, Class, region) |>
mutate(Class = if_else(Class %in% c("Glutamatergic", "GABAergic"),
"Neuronal", Class))
class_per_region <- cell_type_broad_all_regions %>%
group_by(region, Class) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(region) %>%
mutate(total_count = sum(count)) %>%
mutate(percentage = count / total_count * 100) %>%
ungroup() %>%
select(-total_count)
region_per_class <- cell_type_broad_all_regions %>%
group_by(Class, region) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(Class) %>%
mutate(total_count = sum(count)) %>%
mutate(percentage = count / total_count * 100) %>%
ungroup() %>%
select(-total_count)
p3 <- ggplot(cell_type_broad_all_regions, aes(x = region, fill = Class)) +
geom_bar() +
labs(title = "Stacked Barplot of Cell Class per Region",
x = "Region",
y = "Count") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p4 <- ggplot(class_per_region, aes(x = region, y = percentage, fill = Class)) +
geom_bar(stat = "identity", position = "fill") +
labs(title = "Stacked Percentage Barplot of Cell Class per Region",
x = "Region",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p3 | p4
p5 <- ggplot(cell_type_broad_all_regions, aes(x = Class, fill = region)) +
geom_bar() +
labs(title = "Stacked Barplot of Region per Cell Class",
x = "Cell Class",
y = "Count") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p6 <- ggplot(region_per_class, aes(x = Class, y = percentage, fill = region)) +
geom_bar(stat = "identity", position = "fill") +
labs(title = "Stacked Percentage Barplot of Region per Cell Class",
x = "Cell Class",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p5 | p6
cell_type_broad_all_regions_old <- rbind(
data.frame(cell_type_broad = dacc_metadata_old$cell_type_broad, region = 'dACC'),
data.frame(cell_type_broad = dlpfc_metadata_old$cell_type_broad, region = 'DLPFC'),
data.frame(cell_type_broad = hippocampus_metadata_old$cell_type_broad, region = 'Hippocampus')) |>
setNames(c("Label_harmonized", "region"))
# map by Label_harmonized
cell_type_broad_all_regions_old <- full_join(cell_type_broad_all_regions_old,
labels_distinct,
by = "Label_harmonized") |>
select(Label_harmonized, Class, region) |>
mutate(Class = if_else(Class %in% c("Glutamatergic", "GABAergic"),
"Neuronal", Class))
class_per_region_old <- cell_type_broad_all_regions_old %>%
group_by(region, Class) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(region) %>%
mutate(total_count = sum(count)) %>%
mutate(percentage = count / total_count * 100) %>%
ungroup() %>%
select(-total_count)
region_per_class_old <- cell_type_broad_all_regions_old %>%
group_by(Class, region) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(Class) %>%
mutate(total_count = sum(count)) %>%
mutate(percentage = count / total_count * 100) %>%
ungroup() %>%
select(-total_count)
p7 <- ggplot(cell_type_broad_all_regions, aes(x = region, fill = Class)) +
geom_bar() +
labs(title = "New Stacked Barplot of Cell Class per Region", x = "Region", y = "Count") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p8 <- ggplot(cell_type_broad_all_regions_old, aes(x = region, fill = Class)) +
geom_bar() +
labs(title = "Archived Stacked Barplot of Cell Class per Region",
x = "Region",
y = "Count") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p7 | p8
The distribution of cell class count per region look similar.
p9 <- ggplot(class_per_region, aes(x = region, y = percentage, fill = Class)) +
geom_bar(stat = "identity", position = "fill") +
labs(title = "New Stacked Percentage Barplot of Cell Class per Region",
x = "Region",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p10 <- ggplot(class_per_region_old, aes(x = region, y = percentage, fill = Class)) +
geom_bar(stat = "identity", position = "fill") +
labs(title = "Archived Stacked Percentage Barplot of Cell Class per Region",
x = "Region",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p9 | p10
There are more unidentified cells that do not fall into either the neuronal or non-neuronal categories in the new datasets.
p11 <- ggplot(cell_type_broad_all_regions, aes(x = Class, fill = region)) +
geom_bar() +
labs(title = "New Stacked Barplot of Region per Cell Class",
x = "Cell Class",
y = "Count") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p12 <- ggplot(cell_type_broad_all_regions_old, aes(x = Class, fill = region)) +
geom_bar() +
labs(title = "Archived Stacked Barplot of Region per Cell Class",
x = "Cell Class",
y = "Count") +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p11 | p12
There are more unidentified cells in the new datasets. The other two cell classes had a similar distributions in the new and archived datasets.
p13 <- ggplot(region_per_class, aes(x = Class, y = percentage, fill = region)) +
geom_bar(stat = "identity", position = "fill") +
labs(title = "New Stacked Percentage Barplot of Region per Cell Class",
x = "Cell Class",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p14 <- ggplot(region_per_class_old, aes(x = Class, y = percentage, fill = region)) +
geom_bar(stat = "identity", position = "fill") +
labs(title = "Archived Stacked Percentage Barplot of Region per Cell Class",
x = "Cell Class",
y = "Percentage") +
scale_y_continuous(labels = scales::percent) +
theme(
plot.title = element_text(size = 20),
axis.title.x = element_text(size = 18),
axis.text.x = element_text(size = 16),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16)
)
p13 | p14
The unidentified cells from the DLPFC region make up a larger percentage, compared to the archived datasets.
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/libf77blas.so.3.10.3
## LAPACK: /users/guozy/miniconda3/envs/psych/lib/libopenblasp-r0.3.27.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 viridis_0.6.5 viridisLite_0.4.2 SeuratObject_5.0.0
## [5] Seurat_4.3.0 Signac_1.13.0 ggplot2_3.5.1 glue_1.7.0
## [9] readr_2.1.5 purrr_1.0.2 dplyr_1.1.4 tidyr_1.3.0
##
## loaded via a namespace (and not attached):
## [1] spatstat.univar_3.0-0 spam_2.10-0 fastmatch_1.1-4
## [4] plyr_1.8.9 igraph_2.0.3 lazyeval_0.2.2
## [7] sp_2.1-4 splines_4.2.0 BiocParallel_1.32.6
## [10] listenv_0.9.1 scattermore_1.2 GenomeInfoDb_1.34.9
## [13] digest_0.6.36 htmltools_0.5.8.1 fansi_1.0.6
## [16] magrittr_2.0.3 tensor_1.5 cluster_2.1.6
## [19] ROCR_1.0-11 tzdb_0.4.0 globals_0.16.3
## [22] Biostrings_2.66.0 matrixStats_1.3.0 vroom_1.6.5
## [25] spatstat.sparse_3.1-0 colorspace_2.1-1 ggrepel_0.9.5
## [28] xfun_0.46 crayon_1.5.3 RCurl_1.98-1.16
## [31] jsonlite_1.8.8 progressr_0.14.0 spatstat.data_3.1-2
## [34] survival_3.7-0 zoo_1.8-12 polyclip_1.10-6
## [37] gtable_0.3.5 zlibbioc_1.44.0 XVector_0.38.0
## [40] leiden_0.4.3.1 future.apply_1.11.2 BiocGenerics_0.44.0
## [43] abind_1.4-8 scales_1.3.0 spatstat.random_3.3-1
## [46] miniUI_0.1.1.1 Rcpp_1.0.11 xtable_1.8-4
## [49] reticulate_1.38.0 bit_4.0.5 dotCall64_1.1-1
## [52] stats4_4.2.0 htmlwidgets_1.6.4 httr_1.4.7
## [55] ica_1.0-3 farver_2.1.2 pkgconfig_2.0.3
## [58] sass_0.4.9 uwot_0.2.2 deldir_2.0-4
## [61] utf8_1.2.4 labeling_0.4.3 tidyselect_1.2.1
## [64] rlang_1.1.4 reshape2_1.4.4 later_1.3.2
## [67] munsell_0.5.1 tools_4.2.0 cachem_1.1.0
## [70] cli_3.6.3 generics_0.1.3 ggridges_0.5.6
## [73] evaluate_0.24.0 stringr_1.5.1 fastmap_1.2.0
## [76] yaml_2.3.9 goftest_1.2-3 bit64_4.0.5
## [79] knitr_1.48 fitdistrplus_1.2-1 RANN_2.6.1
## [82] pbapply_1.7-2 future_1.33.2 nlme_3.1-165
## [85] mime_0.12 RcppRoll_0.3.1 compiler_4.2.0
## [88] plotly_4.10.4 png_0.1-8 spatstat.utils_3.0-5
## [91] tibble_3.2.1 bslib_0.7.0 stringi_1.8.4
## [94] highr_0.11 lattice_0.22-6 Matrix_1.6-1.1
## [97] vctrs_0.6.5 pillar_1.9.0 lifecycle_1.0.4
## [100] spatstat.geom_3.3-2 lmtest_0.9-40 jquerylib_0.1.4
## [103] RcppAnnoy_0.0.22 data.table_1.15.4 cowplot_1.1.3
## [106] bitops_1.0-7 irlba_2.3.5.1 httpuv_1.6.15
## [109] patchwork_1.2.0 GenomicRanges_1.50.2 R6_2.5.1
## [112] promises_1.3.0 KernSmooth_2.23-24 gridExtra_2.3
## [115] IRanges_2.32.0 parallelly_1.37.1 codetools_0.2-20
## [118] MASS_7.3-56 withr_3.0.0 sctransform_0.4.0
## [121] Rsamtools_2.14.0 S4Vectors_0.36.2 GenomeInfoDbData_1.2.9
## [124] parallel_4.2.0 hms_1.1.3 grid_4.2.0
## [127] rmarkdown_2.27 Rtsne_0.17 spatstat.explore_3.3-1
## [130] shiny_1.8.1.1