
Simulation Workflow for OLE Phase
Lei Shi
2026-05-06
Source:vignettes/OLE_simulation_workflow.Rmd
OLE_simulation_workflow.RmdOLE study
This vignette demonstrates Monte Carlo simulation for evaluating the difference-in-differences (DID) estimators proposed in Zhou et al. (2024) for the open-label extension (OLE) phase.
1. Simulate a dataset for OLE study
# Initialize an empty data list
set.seed(2023)
data_matrix_list <- list()
ntrial <- 3
# Specify the significance level alpha
alpha <- 0.05
# Specify the true effect size at the end of the OLE study
true_effect_long <- 0
# Specify column names
covariates_col_name <- c("x1", "x2", "x3", "x4", "x5")
outcome_col_name <- c("y1", "y2", "y3", "y4")
treatment_col_name <- "A"
trial_status_col_name <- "S"
# Sequentially adding in datasets
for (trial_iter in 1:ntrial) {
# simulate 300 sample
normal <- copula::normalCopula(param = c(0.8), dim = 4, dispstr = "ar1")
# ========== generate internal covariates =============
X_int <- simulate_X_copula(
n = 200,
p = 4,
cp = normal, # copula
margins = c("binom", "binom", "binom", "exp"), # specify marginal distributions
paramMargins = list(
list(size = 1, prob = 0.7), # specify parameters for marginals
list(size = 1, prob = 0.9),
list(size = 1, prob = 0.3),
list(rate = 1 / 10)
)
)
X_int$x4 <- round(X_int$x4) + 1
X_int$x5 <- 30 + 10 * X_int$x1 + (7) * X_int$x2 + (-6) * X_int$x3 + (-0.5) * X_int$x4 + rnorm(200, mean = 0, sd = 10)
varnames <- c("1", paste0("x", 1:5))
# ============ generate external covariates ==============
X_ext <- simulate_X_copula(
n = 100,
p = 4,
cp = normal, # copula
margins = c("binom", "binom", "binom", "exp"), # specify marginal distributions
paramMargins = list(
list(size = 1, prob = 0.7), # specify parameters for marginals
list(size = 1, prob = 0.9),
list(size = 1, prob = 0.3),
list(rate = 1 / 10)
)
)
X_ext$x4 <- round(X_ext$x4) + 1
X_ext$x5 <- 50 + 10 * X_ext$x1 + (2) * X_ext$x2 + (-1) * X_ext$x3 + (-0.3) * X_ext$x4 + rnorm(100, mean = 0, sd = 10)
varnames <- c("1", paste0("x", 1:5))
# ============ Specify outcome models ==============
model_form_x_t1 <- setNames(c(10.0, 0.05, -1.5, -1.0, -0.2, -0.1), varnames) # 1.5*A, sigma = 4.0
model_form_x_t2 <- setNames(c(6.0, 0.5, -0.5, -1.0, -0.3, -0.06), varnames) # 1.8*A, sigma = 4.0
model_form_x_t3 <- setNames(c(5.0, 1.9, 1.4, -1.3, -0.4, -0.15), varnames) # 1.6*A, sigma = 4.0
model_form_x_t4 <- setNames(c(1.2, 1.0, 2.0, -0.5, -0.4, -0.10), varnames) # 2.5*A, sigma = 5.0
outcome_model_specs <- list(
list(
effect = 0, model_form_x = model_form_x_t1, # from data: true_effect = 1.5
noise_mean = 0, noise_sd = 4
), # model form for the first time point, given by model_form_x_t1
list(
effect = 0, model_form_x = model_form_x_t2, # from data: true_effect = 1.8
noise_mean = 0, noise_sd = 4
), # model form for the second time point, given by model_form_x_t2
list(
effect = 0, model_form_x = model_form_x_t3, # from data: true_effect = 1.6
noise_mean = 0, noise_sd = 4
), # model form for the third time point, given by model_form_x_t3
list(
effect = true_effect_long, model_form_x = model_form_x_t4, # from data: true_effect = 2.5
noise_mean = 0, noise_sd = 4
) # model form for the fourth time point, given by model_form_x_t4
)
# =========== generate trial data ============
Data <- simulate_trial(X_int,
X_ext,
num_treated = 150,
OLE_flag = TRUE,
T_cross = 2,
outcome_model_specs
)
data_matrix_list[[trial_iter]] <- Data
}2 Bootstrap inference
# Specify a list of methods to be tested
bootstrap_obj <- setup_bootstrap(
replicates = 50,
bootstrap_CI_type = "perc"
)
model_form_mu <- c(
"y1 ~ x1 + x2 + x3 + x4 + x5",
"y2 ~ x1 + x2 + x3 + x4 + x5",
"y3 ~ x1 + x2 + x3 + x4 + x5",
"y4 ~ x1 + x2 + x3 + x4 + x5"
)
## IPW - optimal weight
method_IPW_DID <- setup_method_DID(
method_name = "IPW",
bootstrap_flag = TRUE,
bootstrap_obj = bootstrap_obj,
model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5",
model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5"
)
## AIPW - optimal weight
method_AIPW_DID <- setup_method_DID(
method_name = "AIPW",
bootstrap_flag = TRUE,
bootstrap_obj = bootstrap_obj,
model_form_piS = "S ~ x1 + x2 + x3 + x4 + x5",
model_form_piA = "A ~ x1 + x2 + x3 + x4 + x5",
model_form_mu0_ext = model_form_mu
)
## OR - optimal weight
method_OR_DID <- setup_method_DID(
method_name = "OR",
bootstrap_flag = TRUE,
bootstrap_obj = bootstrap_obj,
model_form_mu0_ext = model_form_mu,
model_form_mu0_rct = model_form_mu,
model_form_mu1_rct = model_form_mu
)
## method list
method_obj_list <- list(
method_IPW_DID,
method_AIPW_DID,
method_OR_DID
)
# create a simulation object for primary analysis
simulation_OLE_obj <- setup_simulation_OLE(
data_matrix_list = data_matrix_list, # two scenarios
trial_status_col_name = trial_status_col_name,
treatment_col_name = treatment_col_name,
outcome_col_name = outcome_col_name,
covariates_col_name = covariates_col_name,
method_obj_list = method_obj_list,
true_effect = true_effect_long,
T_cross = 2,
alpha = alpha,
method_description = c(
"IPW, DID",
"AIPW, DID",
"OR, DID"
)
)
simulation_report <- run_simulation(simulation_OLE_obj, quiet = FALSE)
#> Null: | method setting: 1 | data setting: 1
#> Null: | method setting: 1 | data setting: 2
#> Null: | method setting: 1 | data setting: 3
#> Null: | method setting: 2 | data setting: 1
#> Null: | method setting: 2 | data setting: 2
#> Null: | method setting: 2 | data setting: 3
#> Null: | method setting: 3 | data setting: 1
#> Null: | method setting: 3 | data setting: 2
#> Null: | method setting: 3 | data setting: 3
simulation_report # Type I error and Power
#> method_description bias variance mse coverage type_I_error
#> 1 IPW, DID 0.7608010 2.8208371 3.399655 1 0
#> 2 AIPW, DID 1.6168995 2.8917875 5.506151 1 0
#> 3 OR, DID 0.7888743 0.5824244 1.204747 1 0
# simulation hypo testThe above code generates a report for the simulation results.
References
- Zhou X, Pang H, Drake C, Burger HU, Zhu J (2024). “Estimating treatment effect in randomized trial after control to treatment crossover using external controls.” Journal of Biopharmaceutical Statistics. doi: 10.1080/10543406.2024.2444222.
- Shi L, Pang H, Chen C, Zhu J (2025). “rdborrow: an R package for causal inference incorporating external controls in randomized controlled trials with longitudinal outcomes.” Journal of Biopharmaceutical Statistics, 35(6), 1043-1066. doi: 10.1080/10543406.2025.2489283.