Example for a Sick-Sicker-Dead model, Quasi-Random Sobol Sequence vs. Purely Random
Javier Sanchez Alvarez
December 18, 2024
example_ssd_sobol.Rmd
Main options
library(WARDEN)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
library(kableExtra)
#>
#> Attaching package: 'kableExtra'
#> The following object is masked from 'package:dplyr':
#>
#> group_rows
library(purrr)
if(!require(randtoolbox)){
install.packages("randtoolbox")
library(randtoolbox)
}
#> Loading required package: randtoolbox
#> Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
#> logical.return = TRUE, : there is no package called 'randtoolbox'
#> Installing package into '/home/runner/work/_temp/Library'
#> (as 'lib' is unspecified)
#> also installing the dependency 'rngWELL'
#> Loading required package: rngWELL
#> This is randtoolbox. For an overview, type 'help("randtoolbox")'.
Introduction
This document runs a discrete event simulation model in the context of a late oncology model to show how using quasi-random numbers can radically change convergence speed by using quasi-random sobol sequences instead of purely random numbers.
Sobol sequences are a deterministic way of generating numbers (between 0 and 1) in a way that fills the space very evenly. Several methods can be used to randomize the generation of these sequences, and to create multiple dimensions when we want to have multiple variables using these sequences. Sobol sequences can fill out the space more evenly than a random uniform distribution, so they can make a model converge faster.
This vignette explores how one can use sobol sequences to accelerate convergence and reduce the number of profiles needed.
n_points <- 500
sobol_seq <- randtoolbox::sobol(n = n_points, dim = 2)
random_points <- matrix(runif(n_points * 2), ncol = 2)
sobol_df <- data.frame(x = sobol_seq[, 1], y = sobol_seq[, 2], Type = "Sobol")
random_df <- data.frame(x = random_points[, 1], y = random_points[, 2], Type = "Random Uniform")
combined_df <- rbind(sobol_df, random_df)
ggplot(combined_df, aes(x = x, y = y, color = Type)) +
geom_point(alpha = 0.7, size = 1) +
facet_wrap(~Type) +
theme_bw() +
labs(
title = "Comparison of Sobol Sequence vs Random Uniform Sampling"
) +
theme(legend.position = "none")
When running a DES, it’s important to consider speed. Simulation based models can be computationally expensive, which means that using efficient coding can have a substantial impact on performance.
General inputs with delayed execution
When we generate inputs, we create two versions, one with random numbers generated from a uniform variable, and another generated from a sobol sequence.
randtoolbox::sobol(1,2, init = TRUE) #initialize
#> [,1] [,2]
#> [1,] 0.5 0.5
N <- 500
sims <- 5
common_all_inputs <-add_item(
util.sick = 0.8,
util.sicker = 0.5,
cost.sick = 3000,
cost.sicker = 7000,
cost.int = 1000,
coef_noint = log(0.2),
HR_int = 0.8,
drc = 0.035,
drq = 0.035
)
common_all_inputs_unif <- common_all_inputs %>%
add_item(random_n = runif(N),
random_n_death = runif(N)) #we draw N random uniform samples
common_all_inputs_sobol <- common_all_inputs %>%
add_item(random_sobol = (randtoolbox::sobol(N,2, init = TRUE) + matrix(rep(runif(2), each = N), nrow = N, byrow = TRUE)) %% 1,
random_n = random_sobol[,1],
random_n_death = random_sobol[,2]) #we draw n sobol sequences, we need to do a small trick as scrambling and seeds are temporarely deactivated within the randtoolbox package
common_pt_inputs <- add_item(death= max(0.0000001,qnorm(random_n_death[i], mean=12, sd=3))) #use random number for death to draw from a normal distribution
unique_pt_inputs <- add_item(fl.sick = 1,
q_default = util.sick,
c_default = cost.sick + if(arm=="int"){cost.int}else{0})
Events
Add Reaction to Those Events
evt_react_list <-
add_reactevt(name_evt = "sick",
input = {}) %>%
add_reactevt(name_evt = "sicker",
input = {
modify_item(list(q_default = util.sicker,
c_default = cost.sicker + if(arm=="int"){cost.int}else{0},
fl.sick = 0))
}) %>%
add_reactevt(name_evt = "death",
input = {
modify_item(list(q_default = 0,
c_default = 0,
curtime = Inf))
})
Model
Model Execution
We run both versions of the model for a few simulations to showcase the different speed of convergence.
results_unif <- run_sim(
npats=N,
n_sim=sims,
psa_bool = FALSE,
arm_list = c("int", "noint"),
common_all_inputs = common_all_inputs_unif,
common_pt_inputs = common_pt_inputs,
unique_pt_inputs = unique_pt_inputs,
init_event_list = init_event_list,
evt_react_list = evt_react_list,
util_ongoing_list = util_ongoing,
cost_ongoing_list = cost_ongoing,
ipd = 2,
seed = 1
)
#> Analysis number: 1
#> Simulation number: 1
#> Patient-arm data aggregated across events by selecting the last value for input_out items.
#> Time to run simulation 1: 0.45s
#> Simulation number: 2
#> Time to run simulation 2: 0.44s
#> Simulation number: 3
#> Time to run simulation 3: 0.6s
#> Simulation number: 4
#> Time to run simulation 4: 0.38s
#> Simulation number: 5
#> Time to run simulation 5: 0.4s
#> Time to run analysis 1: 2.27s
#> Total time to run: 2.28s
results_sobol <- run_sim(
npats=N,
n_sim=sims,
psa_bool = FALSE,
arm_list = c("int", "noint"),
common_all_inputs = common_all_inputs_sobol,
common_pt_inputs = common_pt_inputs,
unique_pt_inputs = unique_pt_inputs,
init_event_list = init_event_list,
evt_react_list = evt_react_list,
util_ongoing_list = util_ongoing,
cost_ongoing_list = cost_ongoing,
ipd = 2,
seed = 1
)
#> Analysis number: 1
#> Simulation number: 1
#> Patient-arm data aggregated across events by selecting the last value for input_out items.
#> Time to run simulation 1: 0.4s
#> Simulation number: 2
#> Time to run simulation 2: 0.39s
#> Simulation number: 3
#> Time to run simulation 3: 0.4s
#> Simulation number: 4
#> Time to run simulation 4: 0.4s
#> Simulation number: 5
#> Time to run simulation 5: 0.4s
#> Time to run analysis 1: 1.99s
#> Total time to run: 1.99s
Post-processing of Model Outputs
Summary of Results
Once the models have been run, we merge the data and generate the cumulative ICER to see how fast they converge to the final estimated value.
It can clearly be seen that the random uniform approach makes the model converge much more slowly than the sobol sequences.
summary_results_sim(results_unif[[1]])
#> int noint
#> costs 59,311 (57,800; 61,194) 52,090 (50,768; 53,725)
#> dcosts 0 (0; 0) 7,220 (7,033; 7,469)
#> lys 9.73 (9.59; 9.91) 9.73 (9.59; 9.91)
#> dlys 0 (0; 0) 0 (0; 0)
#> qalys 6.26 (6.19; 6.39) 6.07 (6; 6.2)
#> dqalys 0 (0; 0) 0.188 (0.178; 0.197)
#> ICER NaN (NA; NA) Inf (Inf; Inf)
#> ICUR NaN (NA; NA) 38,402 (36,651; 42,023)
#> INMB NaN (NA; NA) 2,201 (1,419; 2,604)
#> costs_undisc 74,738 (72,422; 77,338) 65,881 (63,864; 68,132)
#> dcosts_undisc 0 (0; 0) 8,857 (8,558; 9,206)
#> lys_undisc 12 (11.8; 12.3) 12 (11.8; 12.3)
#> dlys_undisc 0 (0; 0) 0 (0; 0)
#> qalys_undisc 7.6 (7.49; 7.8) 7.37 (7.26; 7.55)
#> dqalys_undisc 0 (0; 0) 0.236 (0.223; 0.249)
#> ICER_undisc NaN (NA; NA) Inf (Inf; Inf)
#> ICUR_undisc NaN (NA; NA) 37,578 (35,380; 41,293)
#> INMB_undisc NaN (NA; NA) 2,956 (1,942; 3,550)
#> c_default 59,311 (57,800; 61,194) 52,090 (50,768; 53,725)
#> dc_default 0 (0; 0) 7,220 (7,033; 7,469)
#> c_default_undisc 74,738 (72,422; 77,338) 65,881 (63,864; 68,132)
#> dc_default_undisc 0 (0; 0) 8,857 (8,558; 9,206)
#> q_default 6.26 (6.19; 6.39) 6.07 (6; 6.2)
#> dq_default 0 (0; 0) 0.188 (0.178; 0.197)
#> q_default_undisc 7.6 (7.49; 7.8) 7.37 (7.26; 7.55)
#> dq_default_undisc 0 (0; 0) 0.236 (0.223; 0.249)
summary_results_sim(results_sobol[[1]])
#> int noint
#> costs 59,690 (59,529; 59,855) 52,398 (52,243; 52,536)
#> dcosts 0 (0; 0) 7,292 (7,257; 7,319)
#> lys 9.74 (9.72; 9.77) 9.74 (9.72; 9.77)
#> dlys 0 (0; 0) 0 (0; 0)
#> qalys 6.24 (6.23; 6.26) 6.06 (6.04; 6.08)
#> dqalys 0 (0; 0) 0.184 (0.183; 0.185)
#> ICER NaN (NA; NA) Inf (Inf; Inf)
#> ICUR NaN (NA; NA) 39,650 (39,204; 39,964)
#> INMB NaN (NA; NA) 1,904 (1,835; 1,998)
#> costs_undisc 75,219 (74,967; 75,467) 66,248 (66,004; 66,457)
#> dcosts_undisc 0 (0; 0) 8,971 (8,920; 9,010)
#> lys_undisc 12 (12; 12.1) 12 (12; 12.1)
#> dlys_undisc 0 (0; 0) 0 (0; 0)
#> qalys_undisc 7.58 (7.56; 7.61) 7.36 (7.33; 7.38)
#> dqalys_undisc 0 (0; 0) 0.229 (0.227; 0.231)
#> ICER_undisc NaN (NA; NA) Inf (Inf; Inf)
#> ICUR_undisc NaN (NA; NA) 39,184 (38,675; 39,524)
#> INMB_undisc NaN (NA; NA) 2,477 (2,382; 2,612)
#> c_default 59,690 (59,529; 59,855) 52,398 (52,243; 52,536)
#> dc_default 0 (0; 0) 7,292 (7,257; 7,319)
#> c_default_undisc 75,219 (74,967; 75,467) 66,248 (66,004; 66,457)
#> dc_default_undisc 0 (0; 0) 8,971 (8,920; 9,010)
#> q_default 6.24 (6.23; 6.26) 6.06 (6.04; 6.08)
#> dq_default 0 (0; 0) 0.184 (0.183; 0.185)
#> q_default_undisc 7.58 (7.56; 7.61) 7.36 (7.33; 7.38)
#> dq_default_undisc 0 (0; 0) 0.229 (0.227; 0.231)
det_ipd_unif <- bind_rows(map(results_unif[[1]], "merged_df")) %>% mutate(type = "unif")
det_ipd_sobol <- bind_rows(map(results_sobol[[1]], "merged_df")) %>% mutate(type = "sobol")
merged_ipd <- rbind(det_ipd_unif,det_ipd_sobol) %>%
group_by(arm, type, simulation) %>%
mutate(cumul_total_qalys = cumsum(total_qalys)/pat_id,
cumul_total_costs = cumsum(total_costs)/pat_id) %>%
transmute(type, pat_id, arm, simulation, cumul_total_qalys, cumul_total_costs) %>%
tidyr::pivot_wider(names_from = arm, values_from = c(cumul_total_qalys,cumul_total_costs)) %>%
mutate(inc_costs = cumul_total_costs_int - cumul_total_costs_noint,
inc_qalys = cumul_total_qalys_int - cumul_total_qalys_noint,
ICER = inc_costs/ inc_qalys)
ggplot(merged_ipd, aes(x=pat_id,y=ICER, colour = type, fill = as.factor(simulation)))+
geom_line() +
theme_bw() +
ylim(30000,45000)
#> Warning: Removed 49 rows containing missing values or values outside the scale range
#> (`geom_line()`).
Model with PSA
Model Execution
#Load some data
list_par <- list(parameter_name = list("util.sick","util.sicker","cost.sick","cost.sicker","cost.int","coef_noint","HR_int"),
base_value = list(0.8,0.5,3000,7000,1000,log(0.2),0.8),
DSA_min = list(0.6,0.3,1000,5000,800,log(0.1),0.5),
DSA_max = list(0.9,0.7,5000,9000,2000,log(0.4),0.9),
PSA_dist = list("qnorm","qbeta_mse","qgamma_mse","qgamma_mse","qgamma_mse","qnorm","qlnorm"),
a=list(0.8,0.5,3000,7000,1000,log(0.2),log(0.8)),
b=lapply(list(0.8,0.5,3000,7000,1000,log(0.2),log(0.8)), function(x) abs(x/10)),
scenario_1=list(0.6,0.3,1000,5000,800,log(0.1),0.5),
scenario_2=list(0.9,0.7,5000,9000,2000,log(0.4),0.9)
)
sensitivity_inputs <-add_item(
indicators = if(sensitivity_bool){ create_indicators(sens,n_sensitivity*length(sensitivity_names),rep(1,length(list_par[[1]])))}else{
rep(1,length(list_par[[1]]))} #vector of indicators, value 0 everywhere except at sens, where it takes value 1 (for dsa_min and dsa_max, if not sensitivity analysis, then we activate all of them, i.e., in a PSA)
)
common_all_inputs <- add_item(
random_sobol_psa = (randtoolbox::sobol(1,7, init = TRUE) + matrix(rep(runif(7), each = 1), nrow = 1, byrow = TRUE)) %% 1
) %>%
add_item(
pick_val_v(base = list_par[["base_value"]],
psa = pick_psa(list_par[["PSA_dist"]],random_sobol_psa,list_par[["a"]],list_par[["b"]]),
sens = list_par[[sens_name_used]],
psa_ind = psa_bool,
sens_ind = sensitivity_bool,
indicator = indicators,
names_out = list_par[["parameter_name"]]
)
)
common_all_inputs_unif <- common_all_inputs %>%
add_item(random_n = runif(N),
random_n_death = runif(N)) #we draw N random uniform samples
common_all_inputs_sobol <- common_all_inputs %>%
add_item(random_sobol = (randtoolbox::sobol(N,2, init = TRUE) + matrix(rep(runif(2), each = N), nrow = N, byrow = TRUE)) %% 1,
random_n = random_sobol[,1],
random_n_death = random_sobol[,2]) #we draw n sobol sequences, we need to do a small trick as scrambling and seeds are temporarely deactivated within the randtoolbox package
results_unif_psa <- run_sim(
npats=N,
n_sim=sims,
psa_bool = TRUE,
arm_list = c("int", "noint"),
sensitivity_inputs = sensitivity_inputs,
common_all_inputs = common_all_inputs_unif,
common_pt_inputs = common_pt_inputs,
unique_pt_inputs = unique_pt_inputs,
init_event_list = init_event_list,
evt_react_list = evt_react_list,
util_ongoing_list = util_ongoing,
cost_ongoing_list = cost_ongoing,
ipd = 2,
seed = 1
)
#> Analysis number: 1
#> Simulation number: 1
#> Patient-arm data aggregated across events by selecting the last value for input_out items.
#> Time to run simulation 1: 0.4s
#> Simulation number: 2
#> Time to run simulation 2: 0.41s
#> Simulation number: 3
#> Time to run simulation 3: 0.46s
#> Simulation number: 4
#> Time to run simulation 4: 0.43s
#> Simulation number: 5
#> Time to run simulation 5: 0.43s
#> Time to run analysis 1: 2.13s
#> Total time to run: 2.13s
results_sobol_psa <- run_sim(
npats=N,
n_sim=sims,
psa_bool = TRUE,
arm_list = c("int", "noint"),
sensitivity_inputs = sensitivity_inputs,
common_all_inputs = common_all_inputs_sobol,
common_pt_inputs = common_pt_inputs,
unique_pt_inputs = unique_pt_inputs,
init_event_list = init_event_list,
evt_react_list = evt_react_list,
util_ongoing_list = util_ongoing,
cost_ongoing_list = cost_ongoing,
ipd = 2,
seed = 1
)
#> Analysis number: 1
#> Simulation number: 1
#> Patient-arm data aggregated across events by selecting the last value for input_out items.
#> Time to run simulation 1: 0.43s
#> Simulation number: 2
#> Time to run simulation 2: 0.42s
#> Simulation number: 3
#> Time to run simulation 3: 0.43s
#> Simulation number: 4
#> Time to run simulation 4: 0.43s
#> Simulation number: 5
#> Time to run simulation 5: 0.42s
#> Time to run analysis 1: 2.14s
#> Total time to run: 2.14s
Post-processing of PSA Outputs
Summary of Results
Once the models have been run, we merge the data and generate the cumulative ICER to see how fast they converge to the final estimated value.
It can clearly be seen that the random uniform approach makes the model converge much more slowly than the sobol sequences.
summary_results_sim(results_unif_psa[[1]])
#> int noint
#> costs 60,135 (58,403; 63,016) 53,027 (51,497; 55,211)
#> dcosts 0 (0; 0) 7,108 (5,994; 7,805)
#> lys 10 (9.85; 10.2) 10 (9.85; 10.2)
#> dlys 0 (0; 0) 0 (0; 0)
#> qalys 6.33 (5.63; 6.78) 6.12 (5.53; 6.53)
#> dqalys 0 (0; 0) 0.21 (0.105; 0.25)
#> ICER NaN (NA; NA) Inf (Inf; Inf)
#> ICUR NaN (NA; NA) 38,930 (24,578; 70,228)
#> INMB NaN (NA; NA) 3,388 (-1,708; 6,245)
#> costs_undisc 73,464 (71,678; 76,647) 64,968 (62,991; 67,367)
#> dcosts_undisc 0 (0; 0) 8,495 (7,170; 9,280)
#> lys_undisc 12 (11.8; 12.3) 12 (11.8; 12.3)
#> dlys_undisc 0 (0; 0) 0 (0; 0)
#> qalys_undisc 7.48 (6.7; 7.99) 7.22 (6.57; 7.69)
#> dqalys_undisc 0 (0; 0) 0.254 (0.127; 0.302)
#> ICER_undisc NaN (NA; NA) Inf (Inf; Inf)
#> ICUR_undisc NaN (NA; NA) 38,554 (24,223; 69,703)
#> INMB_undisc NaN (NA; NA) 4,183 (-1,988; 7,695)
#> c_default 60,135 (58,403; 63,016) 53,027 (51,497; 55,211)
#> dc_default 0 (0; 0) 7,108 (5,994; 7,805)
#> c_default_undisc 73,464 (71,678; 76,647) 64,968 (62,991; 67,367)
#> dc_default_undisc 0 (0; 0) 8,495 (7,170; 9,280)
#> q_default 6.33 (5.63; 6.78) 6.12 (5.53; 6.53)
#> dq_default 0 (0; 0) 0.21 (0.105; 0.25)
#> q_default_undisc 7.48 (6.7; 7.99) 7.22 (6.57; 7.69)
#> dq_default_undisc 0 (0; 0) 0.254 (0.127; 0.302)
summary_results_sim(results_sobol_psa[[1]])
#> int noint
#> costs 60,571 (58,122; 64,995) 53,370 (52,034; 56,982)
#> dcosts 0 (0; 0) 7,200 (6,049; 8,074)
#> lys 10 (9.99; 10.1) 10 (9.99; 10.1)
#> dlys 0 (0; 0) 0 (0; 0)
#> qalys 6.31 (5.66; 6.8) 6.1 (5.55; 6.56)
#> dqalys 0 (0; 0) 0.203 (0.105; 0.242)
#> ICER NaN (NA; NA) Inf (Inf; Inf)
#> ICUR NaN (NA; NA) 40,231 (26,808; 71,510)
#> INMB NaN (NA; NA) 2,970 (-1,776; 5,273)
#> costs_undisc 73,994 (71,014; 79,434) 65,368 (63,730; 69,820)
#> dcosts_undisc 0 (0; 0) 8,626 (7,239; 9,685)
#> lys_undisc 12 (12; 12.1) 12 (12; 12.1)
#> dlys_undisc 0 (0; 0) 0 (0; 0)
#> qalys_undisc 7.45 (6.73; 8.03) 7.21 (6.6; 7.74)
#> dqalys_undisc 0 (0; 0) 0.244 (0.126; 0.289)
#> ICER_undisc NaN (NA; NA) Inf (Inf; Inf)
#> ICUR_undisc NaN (NA; NA) 40,091 (26,705; 71,186)
#> INMB_undisc NaN (NA; NA) 3,590 (-2,091; 6,365)
#> c_default 60,571 (58,122; 64,995) 53,370 (52,034; 56,982)
#> dc_default 0 (0; 0) 7,200 (6,049; 8,074)
#> c_default_undisc 73,994 (71,014; 79,434) 65,368 (63,730; 69,820)
#> dc_default_undisc 0 (0; 0) 8,626 (7,239; 9,685)
#> q_default 6.31 (5.66; 6.8) 6.1 (5.55; 6.56)
#> dq_default 0 (0; 0) 0.203 (0.105; 0.242)
#> q_default_undisc 7.45 (6.73; 8.03) 7.21 (6.6; 7.74)
#> dq_default_undisc 0 (0; 0) 0.244 (0.126; 0.289)
psa_ipd_unif <- bind_rows(map(results_unif_psa[[1]], "merged_df")) %>% mutate(type = "unif")
psa_ipd_sobol <- bind_rows(map(results_sobol_psa[[1]], "merged_df")) %>% mutate(type = "sobol")
merged_ipd_psa <- rbind(psa_ipd_unif,psa_ipd_sobol) %>%
group_by(arm, type, simulation) %>%
mutate(cumul_total_qalys = cumsum(total_qalys)/pat_id,
cumul_total_costs = cumsum(total_costs)/pat_id) %>%
transmute(type, pat_id, arm, simulation, cumul_total_qalys, cumul_total_costs) %>%
tidyr::pivot_wider(names_from = arm, values_from = c(cumul_total_qalys,cumul_total_costs)) %>%
mutate(inc_costs = cumul_total_costs_int - cumul_total_costs_noint,
inc_qalys = cumul_total_qalys_int - cumul_total_qalys_noint,
ICER = inc_costs/ inc_qalys)
ggplot(merged_ipd_psa, aes(x=pat_id,y=ICER, colour = type, fill = as.factor(simulation)))+
geom_line() +
theme_bw() +
ylim(20000,80000)
#> Warning: Removed 7 rows containing missing values or values outside the scale range
#> (`geom_line()`).