Storing and Analyzing Imputed Data with rbmiUtils

2025-05-20

1 Introduction

This vignette demonstrates how to:

This pattern enables reproducible workflows where imputation and analysis can be separated and revisited independently.

2 Statistical Context

This approach applies Rubin’s Rules for inference after multiple imputation:

We fit a model to each imputed dataset, dervive a response variable on the CHG score, extract marginal effects or other statistics of interest, and combine the results into a single inference using Rubin’s combining rules.


3 Step 1: Setup and Data Preparation

library(dplyr)
library(tidyr)
library(readr)
library(purrr)
library(rbmi)
library(beeca)
library(rbmiUtils)
set.seed(1974)
data("ADEFF")

ADEFF <- ADEFF %>%
  mutate(
    TRT = factor(TRT01P, levels = c("Placebo", "Drug A")),
    USUBJID = factor(USUBJID),
    AVISIT = factor(AVISIT)
  )

4 Step 2: Define Imputation Model

vars <- set_vars(
  subjid = "USUBJID",
  visit = "AVISIT",
  group = "TRT",
  outcome = "CHG",
  covariates = c("BASE", "STRATA", "REGION")
)
method <- method_bayes(
  n_samples = 100,
  control = control_bayes(warmup = 200, thin = 2)
)
dat <- ADEFF %>%
  select(USUBJID, STRATA, REGION, REGIONC, TRT, BASE, CHG, AVISIT)

draws_obj <- draws(data = dat, vars = vars, method = method)
#> 
#> SAMPLING FOR MODEL 'rbmi_mmrm' NOW (CHAIN 1).
#> Chain 1: 
#> Chain 1: Gradient evaluation took 0.000259 seconds
#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.59 seconds.
#> Chain 1: Adjust your expectations accordingly!
#> Chain 1: 
#> Chain 1: 
#> Chain 1: Iteration:   1 / 400 [  0%]  (Warmup)
#> Chain 1: Iteration:  40 / 400 [ 10%]  (Warmup)
#> Chain 1: Iteration:  80 / 400 [ 20%]  (Warmup)
#> Chain 1: Iteration: 120 / 400 [ 30%]  (Warmup)
#> Chain 1: Iteration: 160 / 400 [ 40%]  (Warmup)
#> Chain 1: Iteration: 200 / 400 [ 50%]  (Warmup)
#> Chain 1: Iteration: 201 / 400 [ 50%]  (Sampling)
#> Chain 1: Iteration: 240 / 400 [ 60%]  (Sampling)
#> Chain 1: Iteration: 280 / 400 [ 70%]  (Sampling)
#> Chain 1: Iteration: 320 / 400 [ 80%]  (Sampling)
#> Chain 1: Iteration: 360 / 400 [ 90%]  (Sampling)
#> Chain 1: Iteration: 400 / 400 [100%]  (Sampling)
#> Chain 1: 
#> Chain 1:  Elapsed Time: 0.322 seconds (Warm-up)
#> Chain 1:                0.244 seconds (Sampling)
#> Chain 1:                0.566 seconds (Total)
#> Chain 1:

impute_obj <- impute(draws_obj, references = c("Placebo" = "Placebo", "Drug A" = "Placebo"))

ADMI <- get_imputed_data(impute_obj)

5 Step 3: Add Responder Variables

ADMI <- ADMI %>%
  mutate(
    CRIT1FLN = ifelse(CHG > 3, 1, 0),
    CRIT1FL = ifelse(CRIT1FLN == 1, "Y", "N"),
    CRIT = "CHG > 3"
  )

6 Step 4: Continuous Endpoint Analysis (CHG)

ana_obj_ancova <- analyse_mi_data(
  data = ADMI,
  vars = vars,
  method = method,
  fun = ancova
)
pool_obj_ancova <- pool(ana_obj_ancova)
print(pool_obj_ancova)
#> 
#> Pool Object
#> -----------
#> Number of Results Combined: 100
#> Method: rubin
#> Confidence Level: 0.95
#> Alternative: two.sided
#> 
#> Results:
#> 
#>   ========================================================
#>       parameter      est     se     lci     uci     pval  
#>   --------------------------------------------------------
#>      trt_Week 24    -2.177  0.182  -2.535  -1.819  <0.001 
#>    lsm_ref_Week 24  0.077   0.131  -0.18   0.334   0.558  
#>    lsm_alt_Week 24   -2.1   0.125  -2.347  -1.854  <0.001 
#>      trt_Week 48    -3.806  0.256  -4.309  -3.304  <0.001 
#>    lsm_ref_Week 48  0.044   0.185  -0.319  0.408   0.811  
#>    lsm_alt_Week 48  -3.762  0.176  -4.107  -3.417  <0.001 
#>   --------------------------------------------------------
tidy_pool_obj(pool_obj_ancova)
#> # A tibble: 6 × 10
#>   parameter       description visit parameter_type lsm_type     est    se    lci
#>   <chr>           <chr>       <chr> <chr>          <chr>      <dbl> <dbl>  <dbl>
#> 1 trt_Week 24     Treatment … Week… trt            <NA>     -2.18   0.182 -2.53 
#> 2 lsm_ref_Week 24 Least Squa… Week… lsm            ref       0.0766 0.131 -0.180
#> 3 lsm_alt_Week 24 Least Squa… Week… lsm            alt      -2.10   0.125 -2.35 
#> 4 trt_Week 48     Treatment … Week… trt            <NA>     -3.81   0.256 -4.31 
#> 5 lsm_ref_Week 48 Least Squa… Week… lsm            ref       0.0442 0.185 -0.319
#> 6 lsm_alt_Week 48 Least Squa… Week… lsm            alt      -3.76   0.176 -4.11 
#> # ℹ 2 more variables: uci <dbl>, pval <dbl>

7 Step 5: Responder Endpoint Analysis (CRIT1FLN)

7.1 Define Analysis Function

gcomp_responder <- function(data, ...) {
  model <- glm(CRIT1FLN ~ TRT + BASE + STRATA + REGION, data = data, family = binomial)

  marginal_fit <- get_marginal_effect(
    model,
    trt = "TRT",
    method = "Ge",
    type = "HC0",
    contrast = "diff",
    reference = "Placebo"
  )

  res <- marginal_fit$marginal_results
  list(
    trt = list(
      est = res[res$STAT == "diff", "STATVAL"][[1]],
      se = res[res$STAT == "diff_se", "STATVAL"][[1]],
      df = NA
    )
  )
}

7.2 Define Variables and Run Analysis

vars_binary <- set_vars(
  subjid = "USUBJID",
  visit = "AVISIT",
  group = "TRT",
  outcome = "CRIT1FLN",
  covariates = c("BASE", "STRATA", "REGION")
)
ana_obj_prop <- analyse_mi_data(
  data = ADMI,
  vars = vars_binary,
  method = method,
  fun = gcomp_responder
)
pool_obj_prop <- pool(ana_obj_prop)
print(pool_obj_prop)
#> 
#> Pool Object
#> -----------
#> Number of Results Combined: 100
#> Method: rubin
#> Confidence Level: 0.95
#> Alternative: two.sided
#> 
#> Results:
#> 
#>   ==================================================
#>    parameter   est     se     lci     uci     pval  
#>   --------------------------------------------------
#>       trt     -0.063  0.012  -0.087  -0.039  <0.001 
#>   --------------------------------------------------

8 Final Notes