Initial setup

rm(list=ls())
library(ggplot2)
library(effects)
library(glmmTMB)
library(performance)
rnd = function(x,digits=2){ return(format(round(x,digits),nsmall=digits)) }
tsz=25

Fully open code at: https://github.com/psicostat/workshops/tree/main/Interaction%20introduction%202024




EXAMPLE: TREATMENT INTERACTION EFFECT

Let’s simulate a simple 2B x 2W (group [control vs treated] x time [pre vs post]) interaction that is treatment-like. First, I simulate a total of 20,000 subjects just to check that everything is ok with the model. I simulate the data in such a way that the effect on the overall standardized scale is d = 0.40 and the intraclass-correlation (ICC) is 0.65 (both these data will be important for power).

set.seed(0)
N = 2e4
id = rep(1:N,times=2)
rInt = rep(rnorm(N,0,.806),times=2)
time = rep(c(0,1),each=N)
group = rep(c(0,1),each=N/2,times=2)
residual = rnorm(N*2,0,.592)
outcome = 0 + 0*time + 0*group + 0.40*time*group + rInt + residual

time = as.factor(time)
group = as.factor(group)

df = data.frame(id,time,group,outcome)

fit1 = glmmTMB(outcome ~ time * group + (1|id), data=df)
plot(allEffects(fit1), multiline=T, ylim=c(-0.05,0.45))

summary(fit1)
##  Family: gaussian  ( identity )
## Formula:          outcome ~ time * group + (1 | id)
## Data: df
## 
##      AIC      BIC   logLik deviance df.resid 
## 102623.0 102674.5 -51305.5 102611.0    39994 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 0.6586   0.8115  
##  Residual             0.3481   0.5900  
## Number of obs: 40000, groups:  id, 20000
## 
## Dispersion estimate for gaussian family (sigma^2): 0.348 
## 
## Conditional model:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.012539   0.010034    1.25    0.211    
## time1        -0.012808   0.008344   -1.53    0.125    
## group1       -0.015383   0.014190   -1.08    0.278    
## time1:group1  0.409802   0.011800   34.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::icc(fit1)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.654
##   Unadjusted ICC: 0.636

Now let’s perform an actual power analysis simulation (1,000 iterations). Let’s say that our real (resource-constrained) N = 150, to be divided into the two groups.

Because the interaction is simple (2 x 2) and it is described by one single interaction coefficient, the power can be directly computed on that interaction coefficient.

set.seed(0)

N = 150
id = rep(1:N,times=2)
time = rep(c(0,1),each=N)
group = rep(c(0,1),each=N/2,times=2)

niter = 1000
p = rep(NA,niter)

for(i in 1:niter){
    rInt = rep(rnorm(N,0,.806),times=2)
    residual = rnorm(N*2,0,.592)
    outcome = 0 + 0*time + 0*group + 0.40*time*group + rInt + residual

    df = data.frame(id,time,group,outcome)
    df$time = as.factor(df$time)
    df$group = as.factor(df$group)

    fit1 = glmmTMB(outcome ~ time * group + (1|id), data=df)
    p[i] = summary(fit1)$coefficients$cond["time1:group1","Pr(>|z|)"]
    # print(mean(p < 0.05, na.rm=T))
}
mean(p < 0.05, na.rm=T)
## [1] 0.82

So, the estimated power is 0.82


Now, what if I ignore the pre-test measures and only compare the two groups at post-test?

This is equivalent to a simple t-test or a one-way ANOVA.

Actually, I lose power. The reason is that I am no longer modelling the inter-individual variability via random intercepts. This variability continues to exist, but not being account in the model, it ends up adding to the residual Sigma, which dampens power.

set.seed(0)

N = 150
id = rep(1:N,times=2)
time = rep(c(0,1),each=N)
group = rep(c(0,1),each=N/2,times=2)

niter = 1000
p = rep(NA,niter)

for(i in 1:niter){
    rInt = rep(rnorm(N,0,.806),times=2)
    residual = rnorm(N*2,0,.592)
    outcome = 0 + 0*time + 0*group + 0.40*time*group + rInt + residual

    df = data.frame(id,time,group,outcome)
    df$time = as.factor(df$time)
    df$group = as.factor(df$group)

    fit1 = lm(outcome ~ group, data=df[df$time==1, ])
    p[i] = summary(fit1)$coefficients["group1","Pr(>|t|)"]
    
    # print(mean(p < 0.05, na.rm=T))
}
mean(p < 0.05, na.rm=T)
## [1] 0.664

So, now the estimated power is down to 0.66

IMPORTANT: this is not always true

Empirically, I’ve just tested that all this emerges (or at least, it becomes appreciable) when ICC > 0.50 (that is, when more observed variance is accounted for by inter-individual variability than by residual variability).