Contents
- 1 Chapter 1: Introduction to R
- 2 Chapter 2: Path Models and Analysis
- 3 Chapter 3: Basic Latent Variable Models
- 4 Chapter 4: Latent Variable Models with Multiple Groups
- 5 Chapter 5: Models with Multiple Time Periods
- 6 Chapter 6: Models with Dichotomous Indicator Variables
- 7 Chapter 7: Missing Data
- 8 Chapter 8: Sample Size Planning
- 9 Chapter 9: Hierarchical Latent Variable Models
Chapter 1: Introduction to R
Input data using c() function
# create new dataset
newData <- c(4,5,3,6,9)
Input covariance matrix
# load lavaan
library(lavaan)
# input covariances
example.cor <- lower2full(c(1, 0.85, 1, 0.84, 0.61, 1, 0.68, 0.59, 0.41, 1))
# name the rows and columns
rownames(example.cor) <- colnames(example.cor) <- c("Var1", "Var2", "Var3", "Var4")
Summary statistics
# load the MLBPitching2011 dataset from the BaylorEdPsych package
library(BaylorEdPsych)
data(MLBPitching2011)
# summary statistics
summary(MLBPitching2011$ERAP)
library(psych)
describe(MLBPitching2011$ERAP)
# frequency of each value of ERAP variable
table(MLBPitching2011$ERAP)
# frequency table
# make cut points for frequency table groupings--here I used 50
boundaries <- seq(0,550,50)
# frequency table
table(cut(MLBPitching2011$ERAP, boundaries))
# relative frequency table
table(cut(MLBPitching2011$ERAP, boundaries)) / length(MLBPitching2011$ERAP)
# Pearson correlations for losses (L) and Age
cor(MLBPitching2011$Age,MLBPitching2011$L)
# Spearman correlation
cor(MLBPitching2011$Age,MLBPitching2011$L, method="spearman")
# covariance for losses (L) and Age
cov(MLBPitching2011$Age,MLBPitching2011$L)
Simulated data
# simulate 1000 observations from Normal distribution with a mean of 100, and SD of 15
X.n<-rnorm(1000,mean=100,sd=15)
# simulate 1000 observations from a Poisson distribution with a mean and variance of 2
X.p<-rpois(1000,lambda=2)
# calculate mean and variance of Normal data
mean(X.n); var(X.n)
# calculate mean and variance of Poisson data
mean(X.p); var(X.p)
# plot the distributions
m1 <- seq(-4,4, .01)*15+100
m2 <- seq(0,15, 1)
plot(m1, dnorm(m1, mean=100, sd=15), type="l", col="black", lwd=3, ylab="Density", xlab="Value", main="Normal")
plot(m2, dpois(m2, lambda=2), type="b", lwd=3, xlim=c(-1,15), xlab="Value", ylab="Density", main="Poisson")
Z scores using the scale() function
# Normal variable
Z.X.n <- scale(X.n)
# Poisson variable
Z.X.p <- scale(X.p)
# calculate mean and variance of Normal Z-scores
mean(Z.X.n); var(Z.X.n)
# calculate mean and variance of Poisson Z-scores
mean(Z.X.p); var(Z.X.p)
Statistical tests
# uses BaylorEdPsych package's MLBPitching2011 data
# Z test
library(TeachingDemos)
z.test(na.omit(MLBPitching2011$WLP), mu=0.50, stdev=sqrt(.08))
# one sample t-test
t.test(MLBPitching2011$WLP, mu=.50, alternative="two.sided", conf.level=.95)
# independent samples t-test
t.test(WLP~Lg, data=MLBPitching2011, na.rm=TRUE, var.equal=TRUE)
# dependent samples t-test
t.test(MLBPitching2011$W, MLBPitching2011$L, paired=TRUE)
# homogeneity of variance tests
# F-test
var.test(WLP[Lg=="NL"], WLP[Lg=="AL"], na.rm=TRUE)
# Bartlett's Test
bartlett.test(WLP,Lg)
library(car)
# Levene's test
leveneTest(WLP, Lg,center="mean")
# Brown-Forsyth Test
leveneTest(WLP,Lg,center="median")
Chapter 2: Path Models and Analysis
Example: Path Analysis using lavaan
# create a correlation matrix
library(lavaan)
regression.cor <- lower2full(c(1.0,0.20,1,0.24,0.30,1,0.70,0.80,0.30,1))
# name the variables in the matrix
colnames(regression.cor) <- rownames(regression.cor) <- c("X1", "X2", "X3", "Y")
# model syntax
regression.model <-'
# structural model for Y
Y ~ a*X1 + b*X2 + c*X3
# label the residual variance of Y
Y ~~ z*Y
'
# fit the model
regression.fit <- sem(regression.model, sample.cov=regression.cor, sample.nobs=1000)
summary(regression.fit, rsquare=TRUE)
Example: Indirect Effects
# input data
beaujean.cov <- lower2full(c(648.07, 30.05, 8.64, 140.18, 25.57, 233.21))
colnames(beaujean.cov) <- rownames(beaujean.cov) <- c("salary", "school", "iq")
# specify the path model
beaujean.model <- '
salary ~ a*school + c*iq
iq ~ b*school # this is reversed in first printing of the book
ind:= b*c
'
# estimate parameters
beaujean.fit <- sem(beaujean.model, sample.cov=beaujean.cov, sample.nobs=300)
summary(beaujean.fit)
Create output table
xtable(parameterEstimates(regression.fit, standardized=TRUE)[,c(1:3,5:6,12)],
caption="Parameter Estimates from Path Analysis Model.", label="tab:path-analysis-estimates")
Chapter 3: Basic Latent Variable Models
Example: Single factor model of WISC-IV data
Marker variable
# import data
library(lavaan)
# convert vector of correlations into matrix
wisc4.cor <- lower2full(c(1,0.72,1,0.64,0.63,1,0.51,0.48,0.37,1,0.37,0.38,0.38,0.38,1))
# enter the SDs
wisc4.sd <- c(3.01 , 3.03 , 2.99 , 2.89 , 2.98)
# name the variables
colnames(wisc4.cor) <- rownames(wisc4.cor) <- c("Information", "Similarities",
"Word.Reasoning", "Matrix.Reasoning", "Picture.Concepts")
names(wisc4.sd) <- c("Information", "Similarities", "Word.Reasoning", "Matrix.Reasoning",
"Picture.Concepts")
# convert correlations and SDs to covarainces
wisc4.cov <- cor2cov(wisc4.cor,wisc4.sd)
# specify single factor model
wisc4.model<-'
g =~ a*Information + b*Similarities + c*Word.Reasoning + d*Matrix.Reasoning +
e*Picture.Concepts
'
# fit model
wisc4.fit <- cfa(model=wisc4.model, sample.cov=wisc4.cov, sample.nobs=550, std.lv=FALSE)
# examine parameter estimates
summary(wisc4.fit,standardized=TRUE)
parameterEstimates(wisc4.fit,standardized=TRUE)
# check model
# model-implied covariances
fitted(wisc4.fit)
# transform model-implied covariances to correlations
wisc4Fit.cov <- fitted(wisc4.fit)$cov
wisc4Fit.cor <- cov2cor(wisc4Fit.cov)
# residual correlations
residuals(wisc4.fit,type="cor")
# measures of model fit
fitMeasures(wisc4.fit)
# modification indices
modificationIndices(wisc4.fit)
Standardized latent variable
# method 1
wisc4.model.Std<-'
g =~ NA*Information + a*Information + b*Similarities + c*Word.Reasoning +
d*Matrix.Reasoning + e*Picture.Concepts
# constrain the LV variance to 1
g~~1*g
'
wisc4.fit.Std <- cfa(wisc4.model.Std, sample.cov=wisc4.cov, sample.nobs=550)
# method 2
wisc4.fit.Std <- cfa(wisc4.model, sample.cov=wisc4.cov, sample.nobs=550, std.lv=TRUE)
Effects coding
wisc4.model.effects<-'
g =~ NA*Information + a*Information + b*Similarities + c*Word.Reasoning +
d*Matrix.Reasoning + e*Picture.Concepts
# constrain the loadings to sum to one
a + b + c + d + e == 5
'
wisc4.fit.effects <- cfa(wisc4.model.effects, sample.cov=wisc4.cor, sample.nobs=550)
Example: Two-factor model of WISC-IV data
wisc4.model2<-'
V =~ a*Information + b*Similarities + c*Word.Reasoning
F =~ d*Matrix.Reasoning + e*Picture.Concepts
V~~f*F
'
wisc4.fit2 <- cfa(wisc4.model2, sample.cov=wisc4.cov, sample.nobs=550)
Structure coefficients
wisc4.fit2Alt <- cfa(wisc4.model2, sample.cov=wisc4.cor, sample.nobs=550, std.lv=TRUE)
wisc4.est2 <- inspect(wisc4.fit2Alt, what="coef")
wisc4.structure2 <- wisc4.est2$lambda %*% wisc4.est2$psi
Example: Structural equation model
wisc4SEM.model <- '
# define latent variables
V =~ a*Information + b*Similarities + c*Word.Reasoning
F =~ d*Matrix.Reasoning + e*Picture.Concepts
# define structural relations
V~k*F
'
wisc4SEM.fit <- cfa(wisc4SEM.model, sample.cov=wisc4.cov, sample.nobs=550)
Chapter 4: Latent Variable Models with Multiple Groups
Example: Multiple-group model examining invariance
Input data
# input data
# variable names
wisc3.names <- c("Info", "Sim", "Vocab","Comp", "PicComp", "PicArr", "BlkDsgn", "ObjAsmb")
# group with manic symptoms
## covariances
manic.cov <- c(9.364, 7.777, 12.461, 6.422, 8.756, 10.112, 5.669, 7.445, 6.797, 8.123, 3.048,
4.922, 4.513, 4.116, 6.200, 3.505, 4.880, 4.899, 5.178, 5.114, 15.603, 3.690, 5.440, 5.220, 3.151, 3.587, 6.219, 11.223, 3.640, 4.641, 4.877, 3.568, 3.819, 5.811, 6.501, 9.797)
manic.cov <- lower2full(manic.cov)
## means
manic.means <- c(10.09, 12.07, 10.25, 9.96, 10.90, 11.24, 10.30, 10.44)
## label the covariances and means
colnames(manic.cov) <- rownames(manic.cov) <- wisc3.names
names(manic.means) <- wisc3.names
# norming group
## covariances
norming.cov <- c(9.610, 5.844, 8.410, 6.324, 6.264, 9.000, 4.405, 4.457, 5.046, 8.410, 4.464, 4.547, 4.512, 3.712, 10.240, 3.478, 2.967, 2.970, 2.871, 3.802, 10.890, 5.270, 4.930, 4.080, 3.254, 5.222, 3.590, 11.560, 4.297, 4.594, 4.356, 3.158, 4.963, 3.594, 6.620, 10.890)
norming.cov <- lower2full(norming.cov)
## means
norming.means <- c(10.10, 10.30, 9.80, 10.10, 10.10, 10.10, 9.90, 10.20)
## label the covariances and means
colnames(norming.cov) <- rownames(norming.cov) <- wisc3.names
names(norming.means) <- wisc3.names
# combine the covariance matrices, sample sizes, and means into single list objects
combined.cov <- list(manic=manic.cov, norming=norming.cov)
combined.n <- list(manic=81, norming=200)
combined.means <- list(manic=manic.means, norming=norming.means)
Fit the model
# specify fit indices of interest
fit.indices <- c("chisq", "df", "cfi", "rmsea", "srmr", "mfi")
# specify model
wisc3.model <-'
VC =~ Info + Sim + Vocab + Comp
VS =~ PicComp + PicArr + BlkDsgn + ObjAsmb
VC ~~ VS
'
# fit separate models for both groups
# group with manic symptoms
manic.fit <- cfa(wisc3.model, sample.cov=manic.cov, sample.nobs=81, sample.mean=manic.means, meanstructure=TRUE)
fitMeasures(manic.fit, fit.indices)
# norming group
norming.fit <- cfa(wisc3.model, sample.cov=norming.cov, sample.nobs=200, sample.mean=norming.means, , meanstructure=TRUE)
fitMeasures(norming.fit, fit.indices)
# configural invariance
configural.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means)
fitMeasures(configural.fit, fit.indices)
# weak invariance
weak.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings"))
fitMeasures(weak.fit, fit.indices)
# strong invariance
strong.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts"))
fitMeasures(strong.fit, fit.indices)
# show modification indices for strong invariance model in descending order
strong.mi <- modindices(strong.fit)
strong.mi <- strong.mi[strong.mi$op=="~1",]
strong.mi[order(strong.mi$mi, decreasing=TRUE),]
# strong invariance 2: intercepts for Similarities subtest is freely estimated
strong.fit2 <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts"), group.partial=c("Sim~1"))
fitMeasures(strong.fit2, fit.indices)
# partial strict invariance
strict.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals"), group.partial=c("Sim~1", "Sim~~Sim"))
fitMeasures(strict.fit, fit.indices)
# partial strict invariance 2: residual variances for Picture Completion, Comprehension, and Picture Arrangement subtests are freely estimated
strict.fit2 <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals"), group.partial=c("Sim~1", "Sim~~Sim", "PicComp~~PicComp", "Comp~~Comp", "PicArr~~PicArr"))
fitMeasures(strict.fit2, fit.indices)
# latent variances
factor.var.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("Sim~1", "Sim~~Sim", "PicComp~~PicComp", "Comp~~Comp", "PicArr~~PicArr"))
fitMeasures(factor.var.fit, fit.indices)
# latent covariances
factor.covar.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"), group.partial=c("Sim~1", "Sim~~Sim", "PicComp~~PicComp", "Comp~~Comp", "PicArr~~PicArr"))
fitMeasures(factor.covar.fit, fit.indices)
# latent means
factor.means.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances", "means"), group.partial=c("Sim~1", "Sim~~Sim", "PicComp~~PicComp", "Comp~~Comp", "PicArr~~PicArr"))
fitMeasures(factor.means.fit, fit.indices)
Example: Behavior genetic analysis
# import data
## MZ twins
MZ <- lower2full(c(.725,.589,.792))
rownames(MZ) <- colnames(MZ) <- c("P1", "P2")
## DZ twins
DZ <- lower2full(c(.779,.246,.837))
rownames(DZ) <- colnames(DZ) <- c("P1", "P2")
# combine the covariances and sample sizes
bmi.cov <- list(MZ=MZ,DZ=DZ)
bmi.n <- list(MZ=534,DZ=328)
# specify ADE model
bmi.ade.model<-'
# build the factor model with group constraints
A1=~ NA*P1 + c(a,a)*P1 + c(.5,.5)*P1
A2=~ NA*P2 + c(a,a)*P2 + c(.5,.5)*P2
D1 =~ NA*P1 + c(d,d)*P1
D2 =~ NA*P2 + c(d,d)*P2
# constrain the factor variances
A1 ~~ 1*A1
A2 ~~ 1*A2
D1 ~~ 1*D1
D2 ~~ 1*D2
P1~~c(e2,e2)*P1
P2~~c(e2,e2)*P2
# constrain the factor covariances
A1 ~~ c(1,.5)*A2
A1 ~~ 0*D1 + 0*D2
A2 ~~ 0*D1 + 0*D2
D1 ~~ c(1,.25)*D2
'
# fit model
bmi.ade.fit <- cfa(bmi.ade.model, sample.cov=bmi.cov, sample.nobs=bmi.n)
summary(bmi.ade.fit, standardized=TRUE)
Chapter 5: Models with Multiple Time Periods
Example: Latent curve model
Import data
Download LCM.zip file from UCLA web page. The nypa95_listwise.sav file is in the SPSS folder of the the chapter02 folder.
library(foreign)
crime.data <- read.spss("nypa95_listwise.sav", to.data.frame=TRUE)
crime.data <- na.omit(crime.data)
# make state x centered poverty interaction variable
crime.data$STATEPOV <- crime.data$PA * crime.data$CPV12590
# dataset 1
time <- c("JANFEB95", "MARAPR95", "MAYJUN95","JLYAUG95")
crime.cov <- cov(crime.data[time])
crime.mean <- colMeans(crime.data[time])
names(crime.mean) <- colnames(crime.cov) <- rownames(crime.cov) <- c("Time1", "Time2", "Time3", "Time4")
# dataset 2
crime.vars <- c("JANFEB95", "MARAPR95", "MAYJUN95","JLYAUG95", "PA", "CPV12590", "STATEPOV")
crime2.cov <- cov(crime.data[crime.vars])
crime2.mean <- colMeans(crime.data[crime.vars])
names(crime2.mean) <- colnames(crime2.cov) <- rownames(crime2.cov) <- c("Time1", "Time2", "Time3", "Time4", "State", "Poverty", "StPov")
Basic latent curve model specification
library(lavaan)
# mean latent intercept and constrained residual variances
crime.model1 <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
i~~0*i
# residual variances
Time1~~r*Time1
Time2~~r*Time2
Time3~~r*Time3
Time4~~r*Time4
'
crime.fit1 <- growth(crime.model1, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952)
# mean latent intercept that is allowed to vary, and constrained residual variances
crime.model2 <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# residual variances
Time1~~r*Time1
Time2~~r*Time2
Time3~~r*Time3
Time4~~r*Time4
'
crime.fit2 <- growth(crime.model2, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952)
# mean latent intercept that is allowed to vary, mean latent slope, and constrained residual variances
crime.model3 <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4
s~0*1
s~~0*i
# residual variances
Time1~~r*Time1
Time2~~r*Time2
Time3~~r*Time3
Time4~~r*Time4
'
crime.fit3 <- growth(crime.model3, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952)
# mean latent intercept that is allowed to vary, mean latent slope that is allowed to vary, and constrained residual variances
crime.model4 <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4
# residual variances
Time1~~r*Time1
Time2~~r*Time2
Time3~~r*Time3
Time4~~r*Time4
'
crime.fit4 <- growth(crime.model4, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952)
# unconstrained model
crime.model5 <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4
'
crime.fit5 <- growth(crime.model5, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952)
Latent curve models with covariates
# State is a predictor variable
crime.model6 <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4
# regression
i + s ~ State
'
crime.fit6 <- growth(crime.model6, sample.cov=crime2.cov, sample.mean=crime2.mean, sample.nobs=952)
# State and poverty are predictors
crime.model7 <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4
# regression
s + i ~ State + Poverty
'
crime.fit7 <- growth(crime.model7, sample.cov=crime2.cov, sample.mean=crime2.mean, sample.nobs=952)
# State, poverty, and their interaction are predictors
crime.model8 <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4
# regression
s + i ~ State + Poverty + StPov
'
crime.fit8 <- growth(crime.model8, sample.cov=crime2.cov, sample.mean=crime2.mean, sample.nobs=952)
Alternative specification of latent intercept and slope
# intercept coded to be the last data collection period
model <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ -3*Time1 + -2*Time2 + -1*Time3 + 0*Time4
'
# intercept coded to be the third data collection period
model <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ -2*Time1 + -1*Time2 + 0*Time3 + 1*Time4
'
# intercept coded to be the middle of data collection
model <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ -1.5*Time1 + -0.5*Time2 + 0.5*Time3 +
1.5*Time4
'
# using two units as the time between data collection periods
model <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ 0*Time1 + 2*Time2 + 4*Time3 + 6*Time4
'
# model with a follow-up time period distant (four units) from the end of regular data collection
model <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope
s =~ 0*Time1 + 1*Time2 + 2*Time3 + 6*Time4
'
# quadratic growth--second latent slope's loadings are the square of the first latent slope's loadings
model <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4
# slope 1
s1 =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4
# slope 2
s2 =~ 0*Time1 + 1*Time2 + 4*Time3 + 9*Time4
'
# piecewise linear slopes
model <- '
# intercept
i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 + 1*Time5 + 1*Time6
# slope 1
s1 =~ -3*Time1 + -2*Time2 + -1*Time3 + 0*Time4 + 0*Time5 + 0*Time6
# slope 2
s2 =~ 0*Time1 + 0*Time2 + 0*Time3 + 0*Time4 + 1*Time5 + 2*Time6
'
Chapter 6: Models with Dichotomous Indicator Variables
Example: dichotomous indicator variables
Import data
library(psych)
data(lsat6)
# tetrachoric correlations
tetrachoric(lsat6)
Item response theory models
library(ltm)
# logistic distribution
# discrimination and difficulty parameterization
lsat.IRT <- ltm(lsat6~z1, IRT.param=TRUE)
coef(lsat.IRT)
# slope and intercept parameterization
lsat.SI <- ltm(lsat6~z1, IRT.param=FALSE)
coef(lsat.SI)
# item characteristic curves
plot(lsat.IRT)
# normal distributon
lsat.NO <- irt.fa(lsat6, plot=FALSE)
# IRT parameter estimates
lsat.NO$irt
# loadings
lsat.NO$fa
# thresholds
lsat.NO$tau
Item factor analysis models
library(lavaan)
twoP.model<-'
# loadings
Theta =~ l1*Q1 + l2*Q2 + l3*Q3 + l4*Q4 + l5*Q5
# thresholds
Q1 | th1*t1
Q2 | th2*t1
Q3 | th3*t1
Q4 | th4*t1
Q5 | th5*t1
# convert loadings to slopes (normal)
alpha1.N := (l1)/sqrt(1-l1^2)
alpha2.N := (l2)/sqrt(1-l2^2)
alpha3.N := (l3)/sqrt(1-l3^2)
alpha4.N := (l4)/sqrt(1-l4^2)
alpha5.N := (l5)/sqrt(1-l5^2)
# convert thresholds to intercepts (normal)
beta1.N := (-th1)/sqrt(1-l1^2)
beta2.N := (-th2)/sqrt(1-l2^2)
beta3.N := (-th3)/sqrt(1-l3^2)
beta4.N := (-th4)/sqrt(1-l4^2)
beta5.N := (-th5)/sqrt(1-l5^2)
# convert intercepts to locations (normal)
loc1 := -beta1.N/alpha1.N
loc2 := -beta2.N/alpha2.N
loc3 := -beta3.N/alpha3.N
loc4 := -beta4.N/alpha4.N
loc5 := -beta5.N/alpha5.N
# convert loadings to slopes (logistic)
alpha1.L := (l1)/sqrt(1-l1^2)*1.7
alpha2.L := (l2)/sqrt(1-l2^2)*1.7
alpha3.L := (l3)/sqrt(1-l3^2)*1.7
alpha4.L := (l4)/sqrt(1-l4^2)*1.7
alpha5.L := (l5)/sqrt(1-l5^2)*1.7
# convert thresholds to locations (logistic)
loc1.L := th1/l1
loc2.L := th2/l2
loc3.L := th3/l3
loc4.L := th4/l4
loc5.L := th5/l5
# convert locations to intercepts (logistic)
beta1.L := (-alpha1.L)*loc1.L
beta2.L := (-alpha2.L)*loc2.L
beta3.L := (-alpha3.L)*loc3.L
beta4.L := (-alpha4.L)*loc4.L
beta5.L := (-alpha5.L)*loc5.L
'
twoP.fit <- cfa(twoP.model, data=data.frame(lsat6), std.lv=TRUE, ordered=c("Q1","Q2","Q3","Q4", "Q5"))
summary(twoP.fit, standardized=TRUE)
Chapter 7: Missing Data
Data can be downloaded here
Exploring missing values
# load mcar data
mcar.data <- read.table("mcar.dat", sep=",", header=TRUE)
# missing data patterns
library(mice)
md.pattern(mcar.data)
# Little's MCAR test of mean equality
library(BaylorEdPsych)
mcar.little <- LittleMCAR(mcar.data)
mcar.little[c("chi.square", "df", "p.value")]
# Examine equality of means and covariances
library(MissMech)
TestMCARNormality(mcar.data)
Handling missing values
# model specification
complete.model <- '
read =~ a*z1 + b*z2 + c*z3
read ~ g*x1
z1~~d*z1
z2~~e*z2
z3~~f*z3
read~~h*read
'
# listwise deletion
mcarListwise.fit <- sem(complete.model, data=mcar.data)
summary(mcarListwise.fit, rsquare=TRUE, standardized=TRUE)
# pairwise deletion
library(psych)
pairwiseMCAR.cov <- cov(mcar.data, use="pairwise.complete.obs")
pairwiseMCAR.mean <- c(mean(mcar.data$z1,na.rm=TRUE),mean(mcar.data$z2, na.rm=TRUE),mean(mcar.data$z3,na.rm=TRUE),mean(mcar.data$x1, na.rm=TRUE),mean(mcar.data$x2, na.rm=TRUE))
names(pairwiseMCAR.mean)<-colnames(pairwiseMCAR.cov)
mcarPairwise.n <- min(count.pairwise(mcar.data))
mcarPairwise.fit <- sem(complete.model, sample.cov=pairwiseMCAR.cov, sample.nobs=mcarPairwise.n, sample.mean=pairwiseMCAR.mean)
summary(mcarPairwise.fit, rsquare=TRUE, standardized=TRUE)
# mean imputation
library(Hmisc)
mcarMeanI.data <- mcar.data
mcarMeanI.data$z1 <- impute(mcarMeanI.data$z1, fun=mean)
mcarMeanI.data$z2 <- impute(mcarMeanI.data$z2, fun=mean)
mcarMeanI.data$z3 <- impute(mcarMeanI.data$z3, fun=mean)
mcarMeanImputation.fit<-sem(complete.model, data=mcarMeanI.data)
summary(mcarMeanImputation.fit, rsquare=TRUE)
# FIML
mcarFIML.fit <- sem(complete.model, data=mcar.data, missing="fiml")
summary(mcarFIML.fit, rsquare=TRUE, standardized=TRUE)
# multiple imputation using mice
library(semTools)
mcarMI.fit <- runMI(complete.model, data=mcar.data, m=20, miPackage="mice", fun="sem", seed=56587)
# multiple imputation using Amelia
library(Amelia)
mcar.sim <- amelia(MCAR.data,m=20)
mcarMI.fit <- runMI(complete.model, data=MCAR.sim$imputations, fun="sem")
Auxiliary variable
library(semTools)
# FIML with auxiliary variable
mcarFIMLAux2.fit <- auxiliary(complete.model, aux="x2", data=mcar.data, fun="sem")
summary(mcarFIMLAux2.fit, standardized=TRUE)
Chapter 8: Sample Size Planning
*Warning: Depending on your computer, the following simulations could take multiple hours to converge
Example: Mean difference between two groups
# traditional power analysis
library(pwr)
pwr.t.test(d = 0.49, sig.level = 0.10, power = 0.80)
t.sampSize <- pwr.t.test(d = 0.49, sig.level = 0.10, power = 0.80)
# Monte Carlo power analysis
# convert effect size to correlation
library(compute.es)
des(d=0.49, n.1=100,n.2=100)
# specify data generation model
act.model <- '
# regression
act ~ 0.24*class
# error variance
act ~~ 0.9424*act
'
# specify analysis model
act2.model <- '
act ~ class
'
# simulate and analyze data
library(simsem)
act.power <- sim(nRep=1000, generate=act.model, model=act2.model, n =104, lavaanfun = "sem", multicore=TRUE, seed=0)
summaryParam(act.power,alpha = 0.10,detail=TRUE)
# search sample sizes
act.n <- sim(model=act2.model, n =rep(seq(50,120,10), 500), generate=act.model, lavaanfun = "sem", multicore=TRUE)
# power curve with line added at y= 0.80
plotPower(act.n, alpha=0.1, powerParam="act~class")
abline(h=.8, lwd=2, lty=2)
# find sample size needed for power = .0.8
act.pwr.n <- getPower(act.n, alpha = 0.10)
findPower(act.pwr.n, iv="N", power=0.8)
Example: Latent curve model
# specify data generation model
lcm.pop.model <- '
# latent variable model
i =~ 1*y1 + 1*y2 + 1*y3 + 1*y4
s =~ 0*y1 + 1*y2 + 2*y3 + 3*y4
# latent variable means
i ~ 0.00*1
s ~ 0.20*1
# regressions, with parameter of interest labeled
i ~ 0.50*x
s ~ a*x + 0.20*x
# mean and variance of x
x ~ 0.50*1
x ~~ 0.25*x
# manifest (residual) variances
y1 ~~ 0.5*y1
y2 ~~ 0.5*y2
y3 ~~ 0.5*y3
y4 ~~ 0.5*y4
# latent variable residual variances/covariances
i~~0.25*i
s~~0.09*s
i~~0.0*s
'
# specify analysis model
lcm.model <- '
# latent variable model
i =~ 1*y1 + 1*y2 + 1*y3 + 1*y4
s =~ 0*y1 + 1*y2 + 2*y3 + 3*y4
# regressions
i ~ x
s ~ a*x
'
# show model's fixed and default values
lcm.pop.fit <- growth(lcm.pop.model, do.fit=FALSE, fixed.x=FALSE)
summary(lcm.pop.fit, std=TRUE, rsquare=TRUE)
fitted(lcm.pop.fit)
# run simulations for multiple sample sizes
lcm.n <- sim(nRep=NULL, model=lcm.model, n =rep(seq(100,200,10), 500), generate=lcm.pop.model,
lavaanfun = "growth", multicore=TRUE)
# find the sample size needed for power = .0.8
lcm.pwr.n <- getPower(lcm.n)
findPower(lcm.pwr.n, "N", 0.8)
# power analysis with n=149
lcm1.power <- sim(nRep=10000, model=lcm.model, n =149, generate=lcm.pop.model, lavaanfun = "growth", multicore=TRUE, seed=998877)
summaryParam(lcm1.power,alpha = 0.05,detail=TRUE)
Example: Latent curve model with attrition
# specify missing data
lcm.pcnt.missing <- '
y1 ~ p(0.12)
y2 ~ p(0.18) + 1*x
y3 ~ p(0.27) + 1*x
y4 ~ p(0.50) + 1*x
'
missing.model <- miss(logit=lcm.pcnt.missing)
# plot missing data
plotLogitMiss(lcm.pcnt.missing, x1lim=c(0,1))
# run simulations for multiple sample sizes
missing.model <- miss(logit=lcm.pcnt.missing, m=10, package="mice")
lcm.missing.n <- sim(nRep=NULL, model=lcm.model, n=rep(seq(200,300,10), 100),
generate=lcm.pop.model, lavaanfun = "growth", multicore=TRUE, miss=missing.model)
# find the sample size needed for power = .0.8
lcm.missing.pwr.n <- getPower(lcm.missing.n)
findPower(lcm.missing.pwr.n, "N", 0.8)
# power analysis with n=273
lcm1.missing <- sim(nRep=10000, model=lcm.model, n =273, generate=lcm.pop.model, lavaanfun = "growth", multicore=TRUE, miss=missing.model, seed=4335)
summaryParam(lcm1.missing,alpha = 0.05,detail=TRUE)
Chapter 9: Hierarchical Latent Variable Models
Example: WISC-IV extended data
Import data
wisc4.cov <- lower2full(c(8.29,5.37,9.06,2.83,4.44,8.35,2.83,3.32,3.36,8.88,5.50,6.66,4.20,3.43,9.18,6.18,6.73,4.01,3.33,6.77,9.12,3.52,3.77,3.19,2.75,3.88,4.05,8.88,3.79,4.50,3.72,3.39,4.53,4.70,4.54,8.94,2.30,2.67,2.40,2.38,2.06,2.59,2.65,2.83,8.76,3.06,4.04,3.70,2.79,3.59,3.67,3.44,4.20,4.53,9.73))
wisc4.sd <- c(2.88,3.01,2.89,2.98,3.03,3.02,2.98,2.99,2.96,3.12)
names(wisc4.sd) <- colnames(wisc4.cov) <- rownames(wisc4.cov) <-c("Comprehension", "Information", "Matrix.Reasoning", "Picture.Concepts", "Similarities", "Vocabulary", "Digit.Span", "Letter.Number", "Coding", "Symbol.Search")
wisc4.fourFactor.model <-'
gc =~ Comprehension + Information + Similarities + Vocabulary
gf =~ Matrix.Reasoning + Picture.Concepts
gsm =~ Digit.Span + Letter.Number
gs =~ Coding + Symbol.Search
'
wisc4.fourFactor.fit<-cfa(model=wisc4.fourFactor.model, sample.cov=wisc4.cov, sample.nobs=550)
summary(wisc4.fourFactor.fit, fit.measure=TRUE, standardized=TRUE)
Higher-order model
wisc4.higherOrder.model<-'
gc =~ Comprehension + Information + Similarities + Vocabulary
gf =~ Matrix.Reasoning + Picture.Concepts
gsm =~ Digit.Span + Letter.Number
gs =~ Coding + Symbol.Search
g=~ NA*gf + gc + gsm + gs
g~~ 1*g
'
wisc4.higherOrder.fit <- cfa(model=wisc4.higherOrder.model, sample.cov=wisc4.cov, sample.nobs=550)
Bi-factor model
wisc4.bifactor.model<-'
gc =~ Comprehension + Information + Similarities + Vocabulary
gf =~ a*Matrix.Reasoning + a*Picture.Concepts
gsm =~ b*Digit.Span + b*Letter.Number
gs =~ c*Coding + c*Symbol.Search
g =~ Information + Comprehension + Matrix.Reasoning + Picture.Concepts + Similarities +
Vocabulary + Digit.Span + Letter.Number + Coding + Symbol.Search
'
wisc4.bifactor.fit<-cfa(model=wisc4.bifactor.model, sample.cov=wisc4.cov, sample.nobs=550, std.lv=TRUE, orthogonal=TRUE)