```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r}
library(tidyverse)
primary <- c("#D1495B", "#EDAE49", "#00798C","#003D5B", "#30638E","#56B4E9", "#7F67B6", "#A4A8D1", "#380606")
```


#cleaning
```{r}
survey|>
  filter(!is.na(Q1))|>
  select(9, 18:36)|>
  mutate(Q1 = str_remove_all(Q1, "~"),
         Q1 = str_remove_all(Q1, "\\+"),
         Q1 = as.numeric(Q1),
         Q2 = str_replace_all(Q2, "Science, Logic, and Mathematics", "SLM"))|>
  separate_wider_delim(cols = Q2, delim = ",", names_sep = "_sep", too_few = "align_start")|>
  select(-6, -7)->survey_clean
```

#intro numbers
```{r}
survey_clean|>
  summary()
  
survey_clean|>
  arrange(Q1)|>
  view()
```


#overviews
```{r}
#overview: counts
survey_clean|>
  count(Q3)|>
  na.omit()|>
  mutate(Q3 = factor(Q3, levels = c("Certificate", "Bachelors", "Masters", "Doctorate")))|>
  rename(`Degree type` = Q3,
         Count = n)|>
  arrange(`Degree type`)|>
  gt::gt(caption = "Responses by program terminal degree")

survey_clean|>
  count(Q2_sep1)|>
  view()


```

#details--plot titles indicate what's being tracked
```{r}
survey_clean|>
  select(Q1, Q3)|>
  filter(Q3 != "Certificate")|>
  na.omit()|>
  arrange(Q3, Q1)|>
  group_by(Q3)|>
  mutate(rn = row_number())|>
  ungroup()|>
  ggplot(aes(rn, Q1))+
  geom_bar(stat = "identity")+
  geom_smooth(linewidth = 2, se = FALSE)+
  facet_wrap(~Q3, scales = "free_x")+
  theme_minimal(base_size = 60)+
  ggtitle("Volume of applications by program status")+
  theme(axis.text.x = element_blank())+
  xlab("")+
  ylab("counts")->vol_status_23

ggsave(plot = vol_status_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\vol_status_23_b.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

survey_clean|>
  filter(str_detect(Q3.1, "R[0-9]"))|>
  select(Q1, Q3.1)|>
  arrange(Q1)|>
  mutate(rn = row_number())|>
  rename(status = Q3.1,
         counts = Q1)|>
  ggplot(aes(rn, counts, fill = status))+
  geom_bar(stat = "identity")+
  scale_fill_manual(values = primary)+
  theme_minimal(base_size = 60)+
  ggtitle("Volume of applications to PhD-granting programs")+
  theme(axis.text.x = element_blank())+
  xlab("")+
  ylab("counts")->vol_status_phd_23

ggsave(plot = vol_status_phd_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\vol_status_phd_23_a.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

survey_clean|>
  filter(Q4 == "Assistant professor/tenure-track")|>
  select(Q1, Q3)|>
  na.omit()|>
  arrange(Q1)|>
  mutate(rn = row_number())|>
  rename(status = Q3,
         counts = Q1)|>
  ggplot(aes(rn, counts, fill = status))+
  geom_bar(stat = "identity")+
  scale_fill_manual(values = primary)+
  theme_minimal(base_size = 60)+
  ggtitle("Volume of applications for TT jobs")+
  theme(axis.text.x = element_blank())+
  xlab("")+
  ylab("counts")->vol_TT_23

ggsave(plot = vol_TT_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\vol_TT_23.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

survey_clean|>
  select(Q1, Q2_sep1, Q3)|>
  na.omit()|>
  arrange(-Q1)|>
  group_by(Q2_sep1, Q3)|>
  summarise(avg = mean(Q1),
            max = max(Q1),
            min = min(Q1))|>
  ungroup()|>
  mutate(Q2_sep1 = as_factor(Q2_sep1),
         aos_avg = mean(avg))|>
  ggplot(aes(Q3, avg, color = Q2_sep1))+
  geom_point(alpha = .8, size = 8)+
    geom_errorbar(aes(ymin = min, ymax = max),alpha = .7, linewidth = 5)+
  geom_hline(aes(yintercept = aos_avg), linetype = "solid", alpha = .3, linewidth = 5)+
  coord_flip()+
  facet_wrap(~ Q2_sep1)+
  theme_minimal(base_size = 60)+
  xlab("AOS")+
  ylab("counts")+
  ggtitle("Volume of applications by AOS and program status")+
  theme(legend.position="none")+
  scale_color_manual(values = primary)->aos_max_min_avg

ggsave(plot = aos_max_min_avg, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\aos_max_min_avg_b.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")

survey_clean|>
  filter(!is.na(Q7))|>
  arrange(Q1)|>
  mutate(rn = row_number())|>
  rename(counts = Q1)|>
  mutate(Q7 = str_replace_all(Q7, "There were about as many applicants as I was expecting", "Expected"),
         Q7 = str_replace_all(Q7, "There were fewer applicants than I was expecting", "Fewer"),
         Q7 = str_replace_all(Q7, "There were more applicants than I was expecting", "More"))|>view()
  ggplot(aes(rn, counts, fill = Q7))+
  geom_bar(stat = "identity")+
  scale_fill_manual(values = primary)+
  theme_minimal(base_size = 60)+
  ggtitle("Volume of applications for TT jobs")+
  theme(axis.text.x = element_blank())+
  xlab("")+
  ylab("counts")->num_imp_23

ggsave(plot = num_imp_23, device = "png", filename = "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\num_imp_23.png", width = 40, height = 30, units = "in", dpi = 300, limitsize = FALSE, bg = "white")
```

#tables
```{r}
survey_clean|>
  select(Q1, Q2_sep1, Q3)|>
  na.omit()|>
  group_by(Q3)|>
  reframe(std = sd(Q1), average = mean(Q1))|>
  ungroup()|>
  rename(status = Q3,
         `standard deviation` = std)|>
  mutate(across(where(is.double), round))|>
  gt::gt(caption = "Averages and dispersion") -> overview_p23

gt::gtsave(overview_p23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\overview_p23.png", expand = 10)

survey_clean|>
  select(Q1, Q3)|>
  na.omit()|>
  arrange(-Q1)|>
  group_by(Q3)|>
 summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1),most = max(Q1) )|>
  ungroup()|>
  mutate(across(where(is.double), round))|>
  rename(status = Q3,
         mean = avg,
         median = mid)|>
  gt::gt(caption = "Overview of applicantions by program status")->all_over_23

gt::gtsave(all_over_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\all_over_23.png", expand = 10)

survey_clean|>
  filter(Q4 == "Assistant professor/tenure-track")|>
  group_by(Q2_sep1)|>
  summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|>
  ungroup()|>
  mutate(across(where(is.double), round))|>
  rename(AOS = Q2_sep1,
         mean = avg,
         median = mid)|>
  gt::gt(caption  = "TT position applications")->tt_23

gt::gtsave(tt_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\tt_23.png", expand = 10)

survey_clean|>
  filter(Q4 == "Postdoc" | str_detect(Q4, "Fixed"))|>
  group_by(Q2_sep1)|>
  summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|>
  ungroup()|>
  mutate(across(where(is.double), round))|>
  rename(AOS = Q2_sep1,
         mean = avg,
         median = mid)|>
  gt::gt(caption  = "Postdoc and fixed-term position applications")->pd_fixed_23

gt::gtsave(pd_fixed_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\pd_fixed_23.png", expand = 10)

survey_clean|>
  filter(Q4 != "Postdoc" & Q4 != "Assistant professor/tenure-track" & str_detect(Q4, "Fixed", negate = TRUE))|>
  group_by(Q2_sep1)|>
  summarize(least = min(Q1), avg = mean(Q1), mid = median(Q1), most = max(Q1))|>
  ungroup()|>
  mutate(across(where(is.double), round))|>
  rename(AOS = Q2_sep1,
         mean = avg,
         median = mid)|>
  gt::gt(caption  = "Open and tenured position applications")->open_23

gt::gtsave(open_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\open_23.png", expand = 10)

  
  survey_clean|>
  filter(!is.na(Q7))|>
  count(Q7)|>
  rename(response = Q7,
         count = n)|>
  arrange(-count)|>
  gt::gt(caption = "Impressions of applicant volume")->imp_app_23
  
  gt::gtsave(imp_app_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\imp_app_23.png", expand = 10)
  
  survey_clean|>
  filter(!is.na(Q6))|>
  count(Q6, Q3)|>view()
  rename(response = Q6,
         count = n)|>
  arrange(-count)|>view()
  gt::gt(caption = "Time allotment for job")->time_job_23
  
  gt::gtsave(time_job_23, "C:\\Users\\lassiter\\OneDrive - Gonzaga University\\R data\\PhilJobs_annual_outputs\\time_job_23.png", expand = 10)


```