8  Model diagnostics & comparison

This page showcase the model diagnosis and comparison for the Emax model

Show the code
library(tidyverse)
library(BayesERtools)
library(posterior)
library(tidybayes)
library(bayesplot)
library(loo)
library(here)
library(gt)

theme_set(theme_bw(base_size = 12))

8.1 Load data

Show the code
d_example_emax_nocov <- read_csv(here("data", "d_example_emax_nocov.csv"))

8.2 Fit models

Show the code
set.seed(1234)

ermod_sigemax <- dev_ermod_emax(
  data = d_example_emax_nocov,
  var_resp = "Y",
  var_exposure = "Conc",
  gamma_fix = NULL
)

ermod_sigemax

── Emax model ──────────────────────────────────────────────────────────────────
 Use `plot_er()` to visualize ER curve

── Developed model ──

---- Emax model fit with rstanemax ----
       mean se_mean   sd  2.5%   25%   50%   75% 97.5%   n_eff Rhat
emax  15.92    0.24 5.49  8.67 11.64 14.71 19.07 29.38  545.77 1.01
e0     3.13    0.09 2.00 -2.15  2.25  3.58  4.54  5.62  550.23 1.00
ec50  24.94    0.30 8.63 14.98 19.47 22.51 28.06 48.31  843.12 1.00
gamma  1.77    0.03 0.77  0.75  1.22  1.62  2.19  3.54  664.61 1.00
sigma  0.86    0.00 0.16  0.62  0.75  0.84  0.95  1.24 1348.22 1.00
* Use `extract_stanfit()` function to extract raw stanfit object
* Use `extract_param()` function to extract posterior draws of key parameters
* Use `plot()` function to visualize model fit
* Use `posterior_predict()` or `posterior_predict_quantile()` function to get
  raw predictions or make predictions on new data
* Use `extract_obs_mod_frame()` function to extract raw data 
  in a processed format (useful for plotting)

Another model without sigmoidal component; will be used when we do model comparison.

Show the code
set.seed(1234)

ermod_emax <- dev_ermod_emax(
  data = d_example_emax_nocov,
  var_resp = "Y",
  var_exposure = "Conc",
  gamma_fix = 1
)

ermod_emax

── Emax model ──────────────────────────────────────────────────────────────────
 Use `plot_er()` to visualize ER curve

── Developed model ──

---- Emax model fit with rstanemax ----
       mean se_mean    sd  2.5%   25%   50%   75% 97.5%   n_eff Rhat
emax  22.52    0.08  2.46 18.73 20.84 22.17 23.81 28.09  976.89 1.00
e0     0.98    0.04  1.36 -2.23  0.32  1.18  1.91  2.98  912.27 1.00
ec50  31.28    0.39 11.65 13.08 22.74 29.70 38.10 57.39  875.72 1.00
gamma  1.00     NaN  0.00  1.00  1.00  1.00  1.00  1.00     NaN  NaN
sigma  0.89    0.00  0.17  0.63  0.77  0.86  0.98  1.28 1216.16 1.01
* Use `extract_stanfit()` function to extract raw stanfit object
* Use `extract_param()` function to extract posterior draws of key parameters
* Use `plot()` function to visualize model fit
* Use `posterior_predict()` or `posterior_predict_quantile()` function to get
  raw predictions or make predictions on new data
* Use `extract_obs_mod_frame()` function to extract raw data 
  in a processed format (useful for plotting)

8.3 Parameter summary table

Show the code
d_draws_sigemax_summary <-
  summarize_draws(ermod_sigemax)

d_draws_sigemax_summary |>
  gt() |>
  fmt_number(decimals = 2)
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
ec50 24.94 22.51 8.63 5.65 16.17 42.09 1.00 1,111.48 1,157.42
sigma 0.86 0.84 0.16 0.15 0.64 1.15 1.00 1,434.70 1,777.17
gamma 1.77 1.62 0.77 0.68 0.84 3.12 1.00 593.05 951.82
e0 3.13 3.58 2.00 1.62 −0.84 5.38 1.00 675.84 809.63
emax 15.92 14.71 5.49 5.13 9.35 26.51 1.00 583.13 809.81

Here is the example of highest-density continuous interval (HDCI) for the median of ED50. See here for more details.

Show the code
# HDCI of median.ED50
as_draws_df(ermod_sigemax) |>
  tidybayes::spread_rvars(ec50) |>
  tidybayes::median_hdci()
# A tibble: 1 × 6
   ec50 .lower .upper .width .point .interval
               
1  22.5   13.3   43.3   0.95 median hdci     

8.4 Fitted values

Fitted values without residual errors (i.e. PRED in NONMEM term) can be extracted with sim_er() function. .epred is the expected value prediction. See ?sim_er for detail.

Show the code
sim_er(ermod_sigemax) |> head()
# A tibble: 6 × 7
# Groups:   Conc, Y, .row [1]
   Conc     Y  .row .draw    .epred .prediction  .linpred
              
1  47.5  15.1     1     1      14.5        14.7      14.5
2  47.5  15.1     1     2      14.6        16.8      14.6
3  47.5  15.1     1     3      14.7        13.5      14.7
4  47.5  15.1     1     4      15.0        15.2      15.0
5  47.5  15.1     1     5      14.4        13.9      14.4
6  47.5  15.1     1     6      14.5        14.7      14.5

You can specify output_type = "median_qi" to get median and quantile intervals of the prediction.

Show the code
ersim_sigemax_med_qi <-
  sim_er(ermod_sigemax, output_type = "median_qi")

ersim_sigemax_med_qi |>
  arrange(.row) |>
  head() |>
  gt(rownames_to_stub = TRUE) |>
  fmt_number(decimals = 2, columns = -.row)
Conc Y .row .epred .epred.lower .epred.upper .prediction .prediction.lower .prediction.upper .linpred .linpred.lower .linpred.upper .width .point .interval
1 47.51 15.14 1 14.56 13.47 15.61 14.54 12.55 16.57 14.56 13.47 15.61 0.95 median qi
2 12.44 7.26 2 7.50 6.81 8.11 7.49 5.62 9.30 7.50 6.81 8.11 0.95 median qi
3 14.90 8.78 3 8.40 7.69 9.02 8.36 6.53 10.20 8.40 7.69 9.02 0.95 median qi
4 13.62 7.05 4 7.94 7.24 8.56 7.93 6.10 9.83 7.94 7.24 8.56 0.95 median qi
5 29.16 10.82 5 12.22 11.62 12.87 12.20 10.28 14.04 12.22 11.62 12.87 0.95 median qi
6 45.16 13.27 6 14.36 13.36 15.27 14.34 12.36 16.29 14.36 13.36 15.27 0.95 median qi

8.5 Diagnostic plots

We use the bayesplot package (Cheat sheet) to visualize the model fit.

8.5.1 Convergence

Good fit results in:

  • Parameter distributions from MCMC chains should overlap
  • Trace plots should not show any trend
  • Rhat close to 1 (e.g. < 1.1)
Show the code
d_draws_sigemax <- as_draws_df(ermod_sigemax)
mcmc_dens_overlay(d_draws_sigemax)
mcmc_trace(d_draws_sigemax)
mcmc_rhat(rhat(ermod_sigemax$mod$stanfit))
Figure 8.1
Figure 8.2
Figure 8.3

8.5.2 Parameter estimates distribution

Show the code
mcmc_hist(d_draws_sigemax)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Figure 8.4
Show the code
mcmc_pairs(d_draws_sigemax,
  off_diag_args = list(size = 0.5, alpha = 0.25))
Figure 8.5

8.5.3 GOF plots

Show the code
plot_er(ermod_sigemax, show_orig_data = TRUE)
Figure 8.6
Show the code
# Preparation for diagnostic plots
ersim_sigemax_med_qi <- sim_er(ermod_sigemax, output_type = "median_qi")

ersim_sigemax_med_qi |>
  ggplot(aes(.epred, Y)) +
  geom_abline(linetype = 2, color = "grey") +
  geom_point() +
  geom_errorbarh(aes(xmin = .epred.lower, xmax = .epred.upper), height = 0) +
  labs(title = "Observed vs Predicted",
    x = "Predicted",
    y = "Observed",
    caption = "Symbol: median and 95% credible interval")
Figure 8.7
Show the code
ersim_sigemax_w_resid <-
  sim_er(ermod_sigemax) |>
  mutate(.residual = Y - .epred) # Add residuals for plotting
ersim_sigemax_w_resid_med_qi <- median_qi(ersim_sigemax_w_resid)

ersim_sigemax_w_resid_med_qi |>
  ggplot(aes(x = .epred, y = .residual)) +
  xlab("Predicted (linear)") +
  ylab("Residuals") +
  geom_point() +
  geom_errorbar(aes(ymin = .residual.lower, ymax = .residual.upper), width = 0) +
  geom_hline(aes(yintercept = 2), lty = 2, colour = "grey70") +
  geom_hline(aes(yintercept = -2), lty = 2, colour = "grey70")
Figure 8.8
Show the code
ersim_sigemax_w_resid_med_qi |>
  ggplot(aes(sample = .residual)) +
  geom_qq() +
  geom_qq_line(colour = "steelblue", lty = 2, alpha = 0.4) +
  coord_equal() +
  xlab("Theoretical") +
  ylab("Sample")
Figure 8.9

8.6 Model comparison

You can perform model comparison based on expected log pointwise predictive density (ELPD). ELPD is the Bayesian leave-one-out estimate (see ?loo-glossary).

Higher ELPD is better, therefore Emax model with γ fixed to be 1 appears better. However, elpd_diff is tiny and smaller than se_diff (see here), therefore we can consider the difference to be not meaningful.

Show the code
loo_sigemax <- loo(ermod_sigemax)
loo_emax <- loo(ermod_emax)

loo_compare(list(sigemax = loo_sigemax, emax = loo_emax))
        elpd_diff se_diff
emax     0.0       0.0   
sigemax -0.3       0.8