library(Seurat)
library(tidyverse)
library(scattermore)

data_dir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/Nik/Analysis_for_human_cross_areal_paper/for_manuscript/"
outs_dir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/Nik/Analysis_for_human_cross_areal_paper/for_manuscript/region_umaps_Cv3/"


meta <- readRDS(str_c(data_dir, "metadata/meta_10x_2_11_22.RDS"))
mat_loc <- read.csv(str_c(data_dir, "metadata/matrix_locations.csv"))

regions <- mat_loc %>% distinct(region) %>% pull()
region_oi <- regions[8]

mat_to_load <- mat_loc %>%
    as_tibble() %>%
    filter(region == region_oi)

meta_sub <- meta %>%
    filter(region == region_oi) %>%
    as.data.frame()
rownames(meta_sub) <- meta_sub$sample_id

mat_1 <- readRDS(mat_to_load$matrix_location[1])    
mat_1 <- mat_1[, which(colnames(mat_1) %in% meta_sub$sample_id)]
mat_2 <- readRDS(mat_to_load$matrix_location[2])    
mat_2 <- mat_2[, which(colnames(mat_2) %in% meta_sub$sample_id)]
mat_3 <- readRDS(mat_to_load$matrix_location[3])    
mat_3 <- mat_3[, which(colnames(mat_3) %in% meta_sub$sample_id)]
mat_4 <- readRDS(mat_to_load$matrix_location[4])    
mat_4 <- mat_4[, which(colnames(mat_4) %in% meta_sub$sample_id)]
mat_5 <- readRDS(mat_to_load$matrix_location[5])    
mat_5 <- mat_5[, which(colnames(mat_5) %in% meta_sub$sample_id)]

mat <- cbind(mat_1, mat_2, mat_3, mat_4, mat_5)
dim(mat)
rm(mat_1, mat_2, mat_3, mat_4, mat_5)
gc()


seurat_obj <- CreateSeuratObject(counts = mat, meta.data = meta_sub)
seurat_obj <- SplitObject(seurat_obj, split.by = "donor")

seurat_obj <- lapply(X = seurat_obj, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 3000)
})

features <- SelectIntegrationFeatures(object.list = seurat_obj)

seurat_obj <- lapply(X = seurat_obj, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
})


anchors <- FindIntegrationAnchors(object.list = seurat_obj, anchor.features = features, reduction = "rpca")
seurat_obj <- IntegrateData(anchorset = anchors)

DefaultAssay(seurat_obj) <- "integrated"

seurat_obj <- ScaleData(seurat_obj, verbose = FALSE) %>%
    RunPCA(npcs = 30, verbose = FALSE) %>%
    RunUMAP(reduction = "pca", dims = 1:30)


to_plot <- seurat_obj@reductions$umap@cell.embeddings %>%
    as_tibble(rownames = "sample_id") %>%
    left_join(seurat_obj@meta.data %>%
                  as_tibble(),
              by = "sample_id")

colors_use <- to_plot %>% select(within_area_cluster, within_area_cluster_color) %>% deframe()

p1 <- to_plot %>%
    sample_n(nrow(to_plot)) %>%
    ggplot() +
    geom_scattermore(aes(x = UMAP_1, y = UMAP_2, color = within_area_cluster), pointsize = 1) +
    labs(title = region_oi) +
    scale_color_manual(values = colors_use) +
    theme_void() +
    theme(aspect.ratio = 1,
          legend.position = "none")
p1

pdf(file = str_c(outs_dir, region_oi, "_umap.pdf"),
    useDingbats = FALSE,
    height = 4,
    width = 4)
print(p1)
dev.off()

write.csv(to_plot, str_c(outs_dir, region_oi, "_umap_coords.csv"))

rm(list = ls())
gc()
