IADR 2026 Workshop - Modelling contextual effect

Author

Li Huihua

Published

March 27, 2026

library(tidyverse)    #Data prep/exploration
library(dplyr)        # data manipulation
library(lme4)         # glmer multilevel models
library(performance)  # icc() — easiest VPC for glmer binomial
library(broom.mixed)   # tidy() for model summaries


## Read data relative to this document
Pain <- readRDS("pain_workshop.rds")
str(Pain)

'data.frame':   5000 obs. of  11 variables:
 $ school_id        : Factor w/ 300 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ school_type      : Factor w/ 2 levels "private","public": 1 1 1 1 1 1 1 1 1 1 ...
 $ school_type_num  : int  1 1 1 1 1 1 1 1 1 1 ...
 $ age              : num  12.6 11.6 11.9 13.7 13.6 11.2 11.8 12.1 11 10.9 ...
 $ age_c            : num  0.555 -0.455 -0.093 1.68 1.555 ...
 $ sex              : Factor w/ 2 levels "male","female": 1 2 1 2 2 2 1 2 1 1 ...
 $ family_income    : Factor w/ 2 levels "high","low": 1 2 1 1 1 1 1 2 1 1 ...
 $ family_income_num: int  1 0 1 1 1 1 1 0 1 1 ...
 $ high_sugar       : Factor w/ 2 levels "low/normal","high": 1 1 1 1 1 1 1 2 1 1 ...
 $ high_sugar_num   : int  0 0 0 0 0 0 0 1 0 0 ...
 $ dental_pain      : int  0 1 1 1 0 0 1 0 0 0 ...

Multilevel Models

##Model 0: Two-level variance-components binomial model
M0 <- glmer(dental_pain ~ 1 + (1|school_id), data = Pain, 
            family = binomial(link="logit"))

# Model 1: Two-level Random Intercept binomial model with exposure only
M1 <- update(M0, . ~ . + high_sugar)

# Model 2: Two-level Random Intercept binomial model adjusted 
##by level 1 confounders
M2 <- update(M1, . ~ . + age_c + sex + family_income)

# Model 3: Two-level Random Intercept binomial model adjusted by 
##all confounders
M3 <- update(M2, . ~ . + school_type)

## Results of M0
summary(M0)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dental_pain ~ 1 + (1 | school_id)
   Data: Pain

      AIC       BIC    logLik -2*log(L)  df.resid 
   6301.1    6314.2   -3148.6    6297.1      4998 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.8974 -0.7440 -0.5083  0.9823  2.5854 

Random effects:
 Groups    Name        Variance Std.Dev.
 school_id (Intercept) 0.6977   0.8353  
Number of obs: 5000, groups:  school_id, 300

Fixed effects:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.59988    0.05836  -10.28   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## probability of having pain 
plogis(fixef(M0))

(Intercept) 
  0.3543713

#Observed prevalence
mean(Pain$dental_pain)

[1] 0.374

Variance Partition Coefficient (VPC)

# manual VPC in a tidy way
var_u <- VarCorr(M0)$school_id[1,1]
vpc <- var_u / (var_u + pi^2 / 3)
var_u

[1] 0.6977398

vpc

[1] 0.174977

# VPC with performance 
icc(M0)

# Intraclass Correlation Coefficient

    Adjusted ICC: 0.175
  Unadjusted ICC: 0.175

# Compare VPCs
models <- list(M0 = M0, M1 = M1, M2 = M2, M3 = M3)

vpc_table <- map(names(models), ~ tibble(
  Model       = .x,
  VPC_percent = round(icc(models[[.x]])$ICC_adjusted[1] * 100, 1),
  School_Var  = VarCorr(models[[.x]])$school_id[1, 1]
)) %>% list_rbind()

vpc_table %>% print()

# A tibble: 4 × 3
  Model VPC_percent School_Var
  <chr>       <dbl>      <dbl>
1 M0           17.5      0.698
2 M1           18        0.721
3 M2           17.5      0.700
4 M3           14.7      0.565

Model Performance

# Compare model performance
comparison <- data.frame(
  Model = names(models),
  AIC   = sapply(models, AIC),
  BIC   = sapply(models, BIC),
  logLik = sapply(models, function(m) as.numeric(logLik(m))),
  df = sapply(models, function(m) attr(logLik(m), "df"))
) %>%
  mutate(
    delta_AIC = AIC - min(AIC),
    delta_BIC = BIC - min(BIC)
  ) %>%
  arrange(AIC)

print(comparison)

   Model      AIC      BIC    logLik df delta_AIC delta_BIC
M3    M3 6071.760 6117.380 -3028.880  7   0.00000   0.00000
M2    M2 6117.161 6156.264 -3052.580  6  45.40111  38.88391
M1    M1 6140.098 6159.649 -3067.049  3  68.33783  42.26905
M0    M0 6301.147 6314.182 -3148.574  2 229.38732 196.80135

Fixed Effects

# Fixed effects in a nice table
tidy(M0, conf.int = TRUE, exponentiate = TRUE) %>%   # ORs with 95% CIs
  filter(effect == "fixed") %>% 
  dplyr::select(term, estimate, std.error, statistic, p.value, conf.low, conf.high) %>% 
  mutate(across(where(is.numeric), ~ round(., 3)))

# A tibble: 1 × 7
  term        estimate std.error statistic p.value conf.low conf.high
  <chr>          <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
1 (Intercept)    0.549     0.032     -10.3       0     0.49     0.615

tidy(M1, conf.int = TRUE, exponentiate = TRUE) %>%   
  filter(effect == "fixed") %>% 
  dplyr::select(term, estimate, std.error, statistic, p.value, conf.low, conf.high) %>% 
  mutate(across(where(is.numeric), ~ round(., 3)))

# A tibble: 2 × 7
  term           estimate std.error statistic p.value conf.low conf.high
  <chr>             <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
1 (Intercept)       0.407     0.026     -14.0       0    0.358     0.461
2 high_sugarhigh    2.38      0.162      12.7       0    2.08      2.72

tidy(M2, conf.int = TRUE, exponentiate = TRUE) %>%   
  filter(effect == "fixed") %>% 
  dplyr::select(term, estimate, std.error, statistic, p.value, conf.low, conf.high) %>% 
  mutate(across(where(is.numeric), ~ round(., 3)))

# A tibble: 5 × 7
  term             estimate std.error statistic p.value conf.low conf.high
  <chr>               <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
1 (Intercept)         0.313     0.026    -13.9    0        0.265     0.368
2 high_sugarhigh      2.30      0.158     12.1    0        2.01      2.63 
3 age_c               1.06      0.035      1.6    0.11     0.988     1.13 
4 sexfemale           1.30      0.085      4.04   0        1.14      1.48 
5 family_incomelow    1.25      0.088      3.18   0.001    1.09      1.44

tidy(M3, conf.int = TRUE, exponentiate = TRUE) %>%   
  filter(effect == "fixed") %>% 
  dplyr::select(term, estimate, std.error, statistic, p.value, conf.low, conf.high) %>% 
  mutate(across(where(is.numeric), ~ round(., 3)))

# A tibble: 6 × 7
  term              estimate std.error statistic p.value conf.low conf.high
  <chr>                <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
1 (Intercept)          0.144     0.02     -13.8    0        0.109     0.189
2 high_sugarhigh       2.28      0.156     12.0    0        1.99      2.61 
3 age_c                1.05      0.035      1.58   0.114    0.987     1.12 
4 sexfemale            1.30      0.085      4.10   0        1.15      1.48 
5 family_incomelow     1.16      0.082      2.08   0.037    1.01      1.33 
6 school_typepublic    2.78      0.405      7.00   0        2.09      3.70

Specific Contextual Effect

# MOR: for use with the multilevel logistic regression model
# var_u denote the estimate variance of the random effects.
var_u=VarCorr(M3)$school_id[1,1]
MOR <- exp(((2*var_u)^0.5)*qnorm(0.75))
MOR

[1] 2.048593

Single Level Model

M_SL <- glm(dental_pain ~ high_sugar + age_c + sex + family_income + school_type, 
            data = Pain, family = binomial(link="logit"))
tidy(M_SL, conf.int = TRUE, exponentiate = TRUE) %>%   
  dplyr::select(term, estimate, std.error) %>% 
  mutate(across(where(is.numeric), ~ round(., 3)))

# A tibble: 6 × 3
  term              estimate std.error
  <chr>                <dbl>     <dbl>
1 (Intercept)          0.176     0.091
2 high_sugarhigh       2.11      0.063
3 age_c                1.05      0.031
4 sexfemale            1.31      0.06 
5 family_incomelow     1.13      0.066
6 school_typepublic    2.49      0.088