library(tidyverse)
library(Seurat)

outs_dir <- "//allen/programs/celltypes/workgroups/rnaseqanalysis/Nik/Analysis_for_human_cross_areal_paper/for_manuscript/astrocyte_subtypes/curated_markers_for_validation/"
dir.create(outs_dir)

seurat_obj <- readRDS("//allen/programs/celltypes/workgroups/rnaseqanalysis/Nik/Analysis_for_human_cross_areal_paper/Dataset_curation_steps/Step10b_combined_regions_and_integrated_per_neighborhood/glia/data/seurat_obj.RDS")

Idents(seurat_obj) <- seurat_obj$within_area_subclass
seurat_obj <- subset(seurat_obj, idents = "Astro")

DefaultAssay(seurat_obj) <- "RNA"
seurat_obj <- NormalizeData(seurat_obj, assay = "RNA")
FeaturePlot(seurat_obj, reduction = "umap", slot = "data", features = c("AQP4", "WIF1", 
                                                                        "GFAP", "ID3",
                                                                        "TNC"), raster = TRUE)

#find marker genes of subtypes
p1 <- FeaturePlot(seurat_obj, reduction = "umap", slot = "data", features = c("TNC"), raster = TRUE)
proto <- CellSelector(p1)
fibrous <- CellSelector(p1)
ilm <- CellSelector(p1)

#proto markers
seurat_obj$comparison <- "group2"
seurat_obj$comparison[which(seurat_obj$sample_id %in% proto)] <- "group1"

Idents(seurat_obj) <- seurat_obj$comparison
proto_markers <- FindAllMarkers(seurat_obj, assay = "RNA", slot = "data", only.pos = TRUE, 
                                test.use = "wilcox", max.cells.per.ident = 100)

proto_markers %>%
  filter(avg_log2FC > 1)

#ilm markers
seurat_obj$comparison <- "group2"
seurat_obj$comparison[which(seurat_obj$sample_id %in% ilm)] <- "group1"

Idents(seurat_obj) <- seurat_obj$comparison
ilm_markers <- FindAllMarkers(seurat_obj, assay = "RNA", slot = "data", only.pos = TRUE, 
                                test.use = "wilcox", max.cells.per.ident = 100)

#fibrous markers
seurat_obj$comparison <- "group2"
seurat_obj$comparison[which(seurat_obj$sample_id %in% fibrous)] <- "group1"

Idents(seurat_obj) <- seurat_obj$comparison
fibrous_markers <- FindAllMarkers(seurat_obj, assay = "RNA", slot = "data", only.pos = TRUE, 
                                test.use = "wilcox", max.cells.per.ident = 100)

#curate marker genes
genes_use <- ilm_markers %>%
  filter(avg_log2FC > 1,
         cluster == "group1")

genes_use <- fibrous_markers %>%
  filter(avg_log2FC > 1,
         cluster == "group1")

genes_use <- proto_markers %>%
  filter(avg_log2FC > 1,
         cluster == "group1")

FeaturePlot(seurat_obj, reduction = "umap", slot = "data", features = genes_use$gene[1:10], raster = TRUE)

#plot of curated marker genes
genes_use <- c("AQP4", "WIF1", "DGKZ", "GRM3",
  
  "DPP10", "DCLK1", "VCAN",
  "GFAP", "ID3",
  
  "TNC", "ADAMTSL3", "AQP1", "SYNPO2",
  "NFASC", "L3MBTL4", "CD44", "SLC24A4",
  "COL21A1", "KCNJ3", "PLCB4",
  
  "GRIA1", "KCNN2", "DPP6", "LINC01411",
  "LOC105370504", "SH3GL2", "ELN",
  "FABP7", "HS3ST3A1", "LMO2", "SHISA6", "VWCE")

p1 <- FeaturePlot(seurat_obj, reduction = "umap", slot = "data", features = genes_use[1:9], raster = TRUE)
p2 <- FeaturePlot(seurat_obj, reduction = "umap", slot = "data", features = genes_use[10:20], raster = TRUE)
p3 <- FeaturePlot(seurat_obj, reduction = "umap", slot = "data", features = genes_use[21:32], raster = TRUE)

pdf(file = str_c(outs_dir, "feature_pan_and_proto.pdf"),
    useDingbats = FALSE,
    height = 10,
    width = 10)
print(p1)
dev.off()

pdf(file = str_c(outs_dir, "feature_fibrous.pdf"),
    useDingbats = FALSE,
    height = 10,
    width = 10)
print(p2)
dev.off()

pdf(file = str_c(outs_dir, "feature_ilm.pdf"),
    useDingbats = FALSE,
    height = 12,
    width = 12)
print(p3)
dev.off()


#gap junction genes
FeaturePlot(seurat_obj, reduction = "umap", slot = "data", raster = TRUE,
            features = c("GJA1", "GJA3", "GJA4", "GJA5", "GJA6P", "GJA8",
                         "GJA9", "GJA10", "GJB1", "GJB2", "GJB3", "GJB4",
                         "GJB5", "GJB6", "GJB7", "GJC1", "GJC2", "GJC3", 
                         "GJD2", "GJD3", "GJD4", "GJE1")
              )

p1 <- FeaturePlot(seurat_obj, reduction = "umap", slot = "data", raster = TRUE,
            features = c("GJA1", "GJB6")
              )
pdf(file = str_c(outs_dir, "feature_expressed_GJgenes.pdf"),
    useDingbats = FALSE,
    height = 6,
    width = 12)
print(p1)
dev.off()


