library(tidyverse)
library(here)
library(Seurat)
library(scrattch.hicat)
library(rstatix)

#load metadta
meta <- readRDS(here("data", "meta_10x_2_11_22.RDS"))
table(meta$within_area_subclass)

meta_sub <- meta %>%
  #filter(within_area_subclass == "L6 IT Car3") %>%
  # filter(within_area_subclass == "L6 IT") %>%
  # filter(within_area_subclass == "L5 IT") %>%
  #filter(within_area_subclass == "L4 IT") %>%
  #filter(within_area_subclass == "L2/3 IT") %>%
  #filter(within_area_subclass == "Chandelier") %>%
  #filter(within_area_subclass == "Pvalb") %>%
  #filter(within_area_subclass == "Sst") %>%
  #filter(within_area_subclass == "Sst Chodl") %>%
  #filter(within_area_subclass == "Lamp5 Lhx6") %>%
  #filter(within_area_subclass == "Lamp5") %>%
  #filter(within_area_subclass == "Sncg") %>%
  #filter(within_area_subclass == "Pax6") %>%
  #filter(within_area_subclass == "Vip") %>%
  #filter(within_area_subclass == "L5 ET") %>%
  #filter(within_area_subclass == "L5/6 NP") %>%
  #filter(within_area_subclass == "L6 CT") %>%
  #filter(within_area_subclass == "L6b") %>%
  #filter(within_area_subclass == "Astro") %>%
  #filter(within_area_subclass == "Endo") %>%
  #filter(within_area_subclass == "Micro/PVM") %>%
  #filter(within_area_subclass == "Oligo") %>%
  #filter(within_area_subclass == "OPC") %>%
  filter(within_area_subclass == "VLMC") %>%
  group_by(region, donor) %>%
  sample_n(min(200, n())) %>%
  ungroup()

#load expression data
files_to_load <- list.files(here("data"), full.names = T) %>%
  enframe() %>%
  #filter(value %>% str_detect("it_type"))
  #filter(value %>% str_detect("mge_inh"))
  #filter(value %>% str_detect("cge_inh"))
  #filter(value %>% str_detect("deep_exc"))
  filter(value %>% str_detect("glia"))

mat <- readRDS(files_to_load$value[1])
mat <- mat[ , which(colnames(mat) %in% meta_sub$sample_id)]

for(i in 2:nrow(files_to_load)){
  print(i)
  tmp_mat <- readRDS(files_to_load$value[i])
  tmp_mat <- tmp_mat[ ,  which(colnames(tmp_mat) %in% meta_sub$sample_id)]
  mat <- cbind(mat, tmp_mat)
  gc()
}
dim(mat)
gc()

#normalize data
seurat_obj <- CreateSeuratObject(mat)
seurat_obj <- NormalizeData(seurat_obj)
mat <- seurat_obj@assays$RNA@data

#make long form mat
cl <- meta_sub %>% 
  mutate(donor_region = str_c(donor, "xxx", region)) %>%
  select(sample_id, donor_region) %>%
  deframe()

cl_means <- get_cl_means(mat, cl)

test <- cl_means %>%
  as_tibble(rownames = "gene") %>%
  gather(key = "donor_region",
         value = "mean_expr",
         -gene) %>%
  separate(donor_region, into = c("donor", "region"), sep = "xxx") %>%
  mutate(donor = donor %>% as.factor(),
         region = region %>% as.factor())

#calc cluster means
results <- tibble(genes = rownames(mat))
results$VLMC <- "test"


for(i in 1:nrow(results)){
  gene_oi <- results$genes[i]
  test_oi <- test %>%
    filter(gene == gene_oi)
  
  model <- aov(data = test_oi, mean_expr ~ factor(region))
  model_res <- summary(model)
  
  results$VLMC[i] <- model_res[[1]]$`Pr(>F)`[1]
}

tail(results)
results %>% 
  filter(VLMC != "NaN") %>%
  mutate(VLMC = as.double(VLMC) * 50281) %>%
  filter(VLMC < 0.05)
gc()

saveRDS(results, here("subclass_anova", "VLMC_results_donor_mean.RDS")) 

