---
title: "3. Performance evaluation"
date: last-modified
---
# Setup and load
```{r}
#| warning: false
#| message: false
library(dplyr)
library(forcats)
library(stringr)
requireNamespace("DT")
source(here::here("R", "0_setup.R"))
theme_set(theme_bw(base_size = 12))
```
```{r}
dataset_to_summarize <- c("compas", "bike")
separate_sculpt_bayes <- function(data) {
  data %>% 
    # parse `model` and extract `-sculpt_*`, only keep * part
    mutate(
      sculpt = model %>% str_extract("-sculpt_\\w+$") %>% str_remove("-sculpt_"),
      sculpt = if_else(is.na(sculpt), "orig.", sculpt),
      sculpt = factor(sculpt, levels = c("orig.", "rough", "detailed", "polished")),
      tune_bayes = if_else(str_detect(model, "_bayes"), TRUE, FALSE),
      model_type = model %>% str_remove("-sculpt_\\w+$") %>% str_remove("_bayes$")
    ) %>%
    relocate(model_type, tune_bayes, sculpt, .after = model)
}
```
# Import data
```{r}
fls <- list.files(storage_folder, pattern = "metrics-ncv")
names(fls) <- fls
res_resample <- map_dfr(fls, load_metrics, .id = "id") %>% 
  mutate(
    dataset = id %>% str_extract("^\\w*-") %>% str_remove("-$"),
    model = id %>% str_remove("-metrics-ncv.csv$") %>% str_remove(paste0("^", dataset, "-"))
  ) %>% 
  select(dataset, model, 
         any_of(c("roc_auc", "mn_log_loss", "rsq", "R2", 
                  "DI", "MI", "R2C1", "r2C1", "rsq_trad", "rmse",
                  "R2_ll", "DI_ll", "MI_ll"))) %>% 
  separate_sculpt_bayes() %>% 
  filter(dataset %in% dataset_to_summarize) %>% 
  arrange(dataset, model, sculpt)
fls2 <- list.files(storage_folder, pattern = "metrics-holdout")
names(fls2) <- fls2
res_hd <- map_dfr(fls2, load_metrics, .id = "id") %>% 
  mutate(
    dataset = id %>% str_extract("^\\w*-") %>% str_remove("-$"),
    model = id %>% str_remove("-metrics-holdout.csv$") %>% str_remove(paste0("^", dataset, "-"))
  ) %>% 
  select(dataset, model,
         any_of(c("roc_auc", "mn_log_loss", "rsq", "R2", 
                  "DI", "MI", "R2C1", "r2C1", "rsq_trad", "rmse",
                  "R2_ll", "DI_ll", "MI_ll"))) %>% 
  separate_sculpt_bayes() %>% 
  filter(dataset %in% dataset_to_summarize) %>% 
  arrange(dataset, model, sculpt)
```
# Figure
```{r}
plot_tune_bayes <- TRUE
res_resample_plot <- 
  res_resample %>% 
  arrange(dataset, model_type, tune_bayes, sculpt) %>% 
  pivot_longer(any_of(c("R2C1", "DI", "MI")),
               names_to = "metric", values_to = "value") %>%
  mutate(metric = factor(metric, levels = c("R2C1", "r2C1", "DI", "MI", "rsq_trad", "rmse")),
         metric = fct_recode(metric, R2 = "R2C1", r2 = "r2C1"))
res_hd_plot <- 
  res_hd %>% 
  arrange(dataset, model_type, tune_bayes, sculpt) %>% 
  pivot_longer(any_of(c("R2C1", "DI", "MI")),
               names_to = "metric", values_to = "value") %>%
  mutate(metric = factor(metric, levels = c("R2C1", "r2C1", "DI", "MI", "rsq_trad", "rmse")),
         metric = fct_recode(metric, R2 = "R2C1", r2 = "r2C1"))
res_plot <- 
  bind_rows(res_hd_plot %>% mutate(eval = "holdout"), 
            res_resample_plot %>% mutate(eval = "resample")) %>% 
  filter(tune_bayes == plot_tune_bayes)
res_plot_compas <- res_plot %>% filter(dataset == "compas")
res_plot_bike <- res_plot %>% filter(dataset == "bike")
```
::: {.panel-tabset}
## Compas
```{r}
res_plot_compas %>% 
  filter(model_type == "xgb") %>%
  ggplot(aes(x = value, y = sculpt, colour = eval)) +
  geom_path(aes(group = eval), alpha = 0.5) + 
  geom_point(size = 2) + 
  facet_grid(cols = vars(metric)) +
  scale_y_discrete(limits = rev(levels(res_hd$sculpt))) +
  labs( 
    title = "XGB sculpted model performance",
    colour = "Evaluation"
  ) + 
  theme(
    axis.text.x = element_text(angle = 30),
    axis.title = element_blank()
  )
models_to_plot_compas <- 
  tribble(~model_type, ~sculpt, ~mod_lab,
          "xgb", "orig.", "XGB",
          "xgb_1_order", "orig.", "XGB (Direct 1st order)",
          "xgb", "polished", "XGB (Polished)",
          "logistic", "orig.", "Logistic",
          "log_ridge", "orig.", "Ridge",
          "log_lasso", "orig.", "Lasso",
          "log_elastic", "orig.", "Elastic Net") %>% 
  mutate(mod_lab = fct_inorder(mod_lab))
res_plot_compas %>% 
  inner_join(models_to_plot_compas, by = c("model_type", "sculpt")) %>%
  arrange(mod_lab) %>% 
  ggplot(aes(x = value, y = mod_lab, colour = eval)) +
  geom_path(aes(group = eval), alpha = 0.5) + 
  geom_point(size = 2) + 
  facet_grid(cols = vars(metric)) +
  scale_y_discrete(limits = rev(levels(models_to_plot_compas$mod_lab))) +
  labs( 
    title = "Performance comparison with linear models",
    colour = "Evaluation"
  ) + 
  theme(
    axis.text.x = element_text(angle = 30),
    axis.title = element_blank()
  )
```
## Bike
```{r}
res_plot_bike %>% 
  filter(model_type == "xgb") %>%
  ggplot(aes(x = value, y = sculpt, colour = eval)) +
  geom_path(aes(group = eval), alpha = 0.5) + 
  geom_point(size = 2) + 
  facet_grid(cols = vars(metric)) +
  scale_y_discrete(limits = rev(levels(res_hd$sculpt))) +
  labs( 
    title = "XGB sculpted model performance",
    colour = "Evaluation"
  ) + 
  theme(
    axis.text.x = element_text(angle = 30),
    axis.title = element_blank()
  )
models_to_plot_bike <- 
  tribble(~model_type, ~sculpt, ~mod_lab,
          "xgb", "orig.", "XGB",
          "xgb_1_order", "orig.", "XGB (Direct 1st order)",
          "xgb", "polished", "XGB (Polished)",
          "lm_ridge", "orig.", "Ridge",
          "lm_lasso", "orig.", "Lasso",
          "lm_elastic", "orig.", "Elastic Net") %>% 
  mutate(mod_lab = fct_inorder(mod_lab))
res_plot_bike %>% 
  inner_join(models_to_plot_bike, by = c("model_type", "sculpt")) %>%
  arrange(mod_lab) %>% 
  ggplot(aes(x = value, y = mod_lab, colour = eval)) +
  geom_path(aes(group = eval), alpha = 0.5) + 
  geom_point(size = 2) + 
  facet_grid(cols = vars(metric)) +
  scale_y_discrete(limits = rev(levels(models_to_plot_bike$mod_lab))) +
  labs( 
    title = "Performance comparison with linear models",
    colour = "Evaluation"
  ) + 
  theme(
    axis.text.x = element_text(angle = 30),
    axis.title = element_blank()
  )
```
:::
# Table
## Resampling
```{r}
res_resample %>% 
  select(-model) %>% 
  mutate(across(where(is.numeric), \(x) sprintf(x, fmt = '%.4f'))) %>%
  mutate(across(where(is.character), as.factor)) %>%
  DT::datatable(filter = "top")
```
## Holdout
```{r}
res_hd %>% 
  select(-model) %>% 
  mutate(across(where(is.numeric), \(x) sprintf(x, fmt = '%.4f'))) %>%
  mutate(across(where(is.character), as.factor)) %>%
  DT::datatable(filter = "top")
```
```{r}
#| include: false
# Output for the slide
res_compas_summary <- 
  bind_rows(res_hd %>% mutate(eval = "holdout"), 
            res_resample %>% mutate(eval = "resample")) %>% 
  filter(dataset == "compas") %>% 
  filter(tune_bayes == plot_tune_bayes) %>% 
  mutate(across(where(is.numeric), \(x) sprintf(x, fmt = '%.3f')))
res_compas_summary %>% 
  filter(model_type == "xgb") %>% 
  select(sculpt, R2C1, DI, MI, eval) %>% 
  arrange(desc(eval), sculpt) %>% 
  pivot_wider(names_from = eval, values_from = c(R2C1, DI, MI),
              names_vary = 'slowest',
              names_glue = "{eval}_{.value}")
res_compas_summary %>% 
  filter(model_type == "xgb") %>% 
  select(sculpt, R2_ll, DI_ll, MI_ll, eval) %>% 
  arrange(desc(eval), sculpt) %>% 
  pivot_wider(names_from = eval, values_from = c(R2_ll, DI_ll, MI_ll),
              names_vary = 'slowest',
              names_glue = "{eval}_{.value}")
res_compas_summary %>% 
  inner_join(models_to_plot_compas, by = c("model_type", "sculpt")) %>%
  select(mod_lab, R2C1, DI, MI, eval) %>% 
  arrange(desc(eval), mod_lab) %>% 
  pivot_wider(names_from = eval, values_from = c(R2C1, DI, MI),
              names_vary = 'slowest',
              names_glue = "{eval}_{.value}")
res_compas_summary %>% 
  inner_join(models_to_plot_compas, by = c("model_type", "sculpt")) %>%
  select(mod_lab, R2_ll, DI_ll, MI_ll, eval) %>% 
  arrange(desc(eval), mod_lab) %>% 
  pivot_wider(names_from = eval, values_from = c(R2_ll, DI_ll, MI_ll),
              names_vary = 'slowest',
              names_glue = "{eval}_{.value}")
```
# Session info
```{r}
devtools::session_info()
```