library(tidyverse)
library(here)
library(Seurat)
library(scrattch.hicat)

#load metadta
meta <- readRDS(here("data", "meta_10x_2_11_22.RDS"))
table(meta$within_area_subclass)

meta_sub <- meta %>%
  filter(within_area_subclass == "L5 IT") %>%
  group_by(region) %>%
  sample_n(min(200, n())) %>%
  ungroup()

#load expression data
files_to_load <- list.files(here("data"), full.names = T) %>%
  enframe() %>%
  #filter(value %>% str_detect("glia"))
  #filter(value %>% str_detect("mge_inh"))
  #filter(value %>% str_detect("cge_inh"))
  filter(value %>% str_detect("it_type"))
  #filter(value %>% str_detect("deep_exc"))

mat <- readRDS(files_to_load$value[1])
mat <- mat[ , which(colnames(mat) %in% meta_sub$sample_id)]

for(i in 2:nrow(files_to_load)){
  print(i)
  tmp_mat <- readRDS(files_to_load$value[i])
  tmp_mat <- tmp_mat[ ,  which(colnames(tmp_mat) %in% meta_sub$sample_id)]
  mat <- cbind(mat, tmp_mat)
  gc()
}
dim(mat)
gc()

#calc cluster means
results <- tibble(genes = rownames(mat))
results$L5IT <- "test"

for(i in 1:nrow(results)){
  gene_oi <- results$genes[i]
  test <- mat[gene_oi, ] %>%
    enframe(name = "sample_id", value = "expr") %>%
    left_join(meta_sub, by = "sample_id")
  
  one.way <- aov(expr ~ region, data = test)
  results$L5IT[i] <- anova(one.way)$'Pr(>F)'[1]
}
gc()

saveRDS(results, here("subclass_anova", "L5IT_results.RDS")) 
