library(tidyverse)
library(here)

subclass_order <- c("L23IT", "L4IT", "L5IT", "L6IT", "L6ITCar3",
                    "L5ET", "L56NP", "L6b", "L6CT",
                    "Lamp5Lhx6", "Lamp5", "Sncg", "Vip", "Pax6",
                    "Chandelier", "Pvalb", "Sst", "SstChodl",
                    "OPC", "Oligo", "Astro", "MicroPVM", "Endo", "VLMC")

files_to_load <- list.files(here("markers", "subclass_anova"), full.names = T) %>%
  enframe() %>%
  filter(value %>% str_detect(".RDS")) 

data <- readRDS(files_to_load$value[1]) %>%
  select(genes)

for(i in 1:nrow(files_to_load)){
  data_tmp <- readRDS(files_to_load$value[i])  
  data <- data %>% left_join(data_tmp, by = "genes")
}

write.csv(data, here("tables", "subclass_ANOVA_pvalues.csv"))

#make plot
to_plot <- data %>%
  gather(key = "cluster", 
         value = "sig", -genes) %>%
  mutate(sig = case_when(sig == "NaN" ~ "1",
                         TRUE ~ sig)) %>%
  mutate(sig = as.double(sig) * 50271) %>%
  group_by(cluster) %>%
  mutate(gene_count = n()) %>%
  ungroup() %>%
  
  mutate(sig_thres = case_when(sig < 0.05 ~ 1,
                               TRUE ~0)) %>%
  
  group_by(cluster, gene_count) %>%
  summarise(sig_count = sum(sig_thres)) %>%
  ungroup()

p1 <- to_plot %>%
  mutate(cluster = cluster %>% as_factor() %>% fct_relevel(subclass_order)) %>%
  ggplot() +
  geom_col(aes(x = cluster, y = sig_count)) +
  labs(x = "",
       y = "Number of significant genes\n(ANOVA for donor means of each\ngene across regions)") +
  scale_y_log10() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.2))
pdf(file = here("subclass_anova", "sig_genes_barplot.pdf"),
    useDingbats = F, height = 4, width = 5)
print(p1)
dev.off()
