factor scores curiosity -- comparing OpenMx and lavaan
when estimating a very simple latent variable autoregressive panel (AR(1)) model with an integrated trend component, I noticed that the factor scores based on the regression method do not include the trend (or maybe they are a kind of detrended). I'm wondering whether there maybe is a reason or a rationale behind this that I don't see. If you add the model implied means to the factor scores, you get the same factor scores as when using lavaan.
Here is a minimal working example:
# input parameters
#-----------------
J=2000 # sample size
Tpoints=I=5 # time points und Items
P=1
beta=BE=0.75 # AR coefficient
PS=1-beta**2 # process noise
PH=1 # initial variance of f (at t1)
LD=matrix(c(0.65,0.75,0.80,0.85,0.95),Tpoints,P) # loadings
TH=diag(1-as.vector(LD)**2) # measurement error variance
trend <- 1
# generate true scores f
#-----------------------
set.seed(666666)
# f at t1
f1 <- matrix( NA, J, 1)
f1[,1] <- rnorm( n=J, mean=0, sd= sqrt(PH) )
# final f matrix
f.true <- matrix( NA , nrow=J, ncol=Tpoints )
colnames(f.true) <- paste0("T", formatC( 1:(Tpoints), format="fg", width=nchar(as.character(Tpoints)), flag=0 ) )
for(j in 1:J) {
f <- matrix( NA , nrow=Tpoints, ncol=P )
for(t in 1:(Tpoints)){
if(t %% Tpoints==1){
f[t,] <- f1[j,]
}else{
f[t,] <- (t-1)*trend + BE %*% f[t-1,] + rnorm( n= 1, mean = 0, sd = sqrt( PS ) )
}
}
f.true[j,] <- f
}
# generate responses Y
# --------------------
Y <- NULL # J x I*Tpoints dataset
for(j in 1:J) {
# observed
Y1 <- matrix( NA, nrow=Tpoints, ncol=I )
Y2 <- NULL
# var/cov matrix of epsilons, measurement error)
EPSILON <- matrix( NA, nrow=Tpoints, ncol=I )
for(t in 1:(Tpoints)){ # n=1; t=1
EPSILON[t,] <- sapply( diag(TH), function(variance) rnorm( n=1, mean=0, sd=sqrt(variance) ) )
Y1[t,] <- f.true[j,t] %*% t(LD) + EPSILON[t,]
Y2 <- cbind(Y2, matrix( Y1[t,], nrow=1, ncol=I, dimnames=list(NULL, c(paste0("Y", 1:I, "_", "T", t))) ) )
}
Y <- as.data.frame( rbind(Y, Y2 ) , drop=F)
}
# model estimation in OpenMx
#---------------------------
dataset=Y; I=Tpoints=5; P=1;PH=1; psilab=rep("PS",Tpoints-1)
# load OpenMx
library(OpenMx)
# packageVersion("OpenMx") # [1] '2.12.2'
# use NPSOL as optimizer
if ( options()$mxOption$'Default optimizer'!='NPSOL'){
mxOption(NULL, "Default optimizer", 'NPSOL')
}
modelname <- "AR1_with_linear_trend"
latVars <- c( paste0("f",1:Tpoints),paste0("lint",1:(Tpoints-1)) )
ARmodelMX <- mxModel(
name=modelname,
mxData( dataset, type="raw", means=NA, numObs = nrow(dataset) ),
manifestVars = c( colnames(dataset) ),
latentVars = latVars,
type="RAM",
# measurement model
mxPath(from="f1",to=c("Y1_T1", "Y2_T1", "Y3_T1", "Y4_T1", "Y5_T1","f2"), free=c(T,T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),
mxPath(from="f2",to=c("Y1_T2", "Y2_T2", "Y3_T2", "Y4_T2", "Y5_T2","f3"), free=c(T,T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),
mxPath(from="f3",to=c("Y1_T3", "Y2_T3", "Y3_T3", "Y4_T3", "Y5_T3","f4"), free=c(T,T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),
mxPath(from="f4",to=c("Y1_T4", "Y2_T4", "Y3_T4", "Y4_T4", "Y5_T4","f5"), free=c(T,T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),
mxPath(from="f5",to=c("Y1_T5", "Y2_T5", "Y3_T5", "Y4_T5", "Y5_T5"), free=c(T,T,T,T,T),
arrows=1, label=c("LD1","LD2","LD3","LD4","LD5") ) ,
# latent trend variables
mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("f2", "f3", "f4", "f5" ), free=FALSE, value=1 , arrows=1 ) ,
mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("lint1", "lint2", "lint3", "lint4"), free=FALSE, value=c(0) , arrows=2 ) ,
mxPath(from=c("lint1", "lint2", "lint3"), to=c("lint2", "lint3", "lint4"), free=FALSE, value=c(1) , arrows=1 ) ,
# disturbances
mxPath(from=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"),to=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"), free=T, arrows=2, label=c("TH1") ),
mxPath(from=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"),to=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"), free=T, arrows=2, label=c("TH2") ),
mxPath(from=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"),to=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"), free=T, arrows=2, label=c("TH3") ),
mxPath(from=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"),to=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"), free=T, arrows=2, label=c("TH4") ),
mxPath(from=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"),to=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"), free=T, arrows=2, label=c("TH5") ),
# (error) variances of f
mxPath(from="f1",to="f1", free=FALSE , value=PH , arrows=2, label="PH" ),
mxPath(from="f2",to="f2", free=TRUE , arrows=2, label=psilab[1] ),
mxPath(from="f3",to="f3", free=TRUE , arrows=2, label=psilab[2] ),
mxPath(from="f4",to="f4", free=TRUE , arrows=2, label=psilab[3] ),
mxPath(from="f5",to="f5", free=TRUE , arrows=2, label=psilab[4] ),
# intercepte auf 0
mxPath(from="one",to=colnames(dataset), free=FALSE, value=0, arrows=1),
mxPath(from="one",to=latVars, free=c(FALSE,FALSE,FALSE,FALSE,FALSE, TRUE,TRUE,TRUE,TRUE),
value=c(0,0,0,0,0, 0,0,0,0),
arrows=1,
label=c("noint","noint","noint","noint","noint", "lt", "lt", "lt", "lt") )
)
# fit model to the data
#-----------------------
fitMx <- mxRun( ARmodelMX)
summary(fitMx)
# Model implied means
#--------------------
# B matrix
B <- matrix(0,9,9)
B[2,1] <- B[3,2] <- B[4,3] <- B[5,4] <- fitMx@output$estimate["BE"]
B[2,6] <- B[3,7] <- B[4,8] <- B[5,9] <- 1
B[7,6] <- B[8,7] <- B[9,8] <- 1
# model implied means
meanslat <- solve( diag(1,ncol(B)) - B) %*% matrix(c(rep(0,Tpoints), rep(fitMx@output$estimate["lt"], Tpoints-1)), ncol=1)
meansf <- as.matrix( meanslat[1:Tpoints,], ncol=1)
# compute regression method based individual scores via OpenMx
#-------------------------------------------------------------
omxR <- mxFactorScores(fitMx, type='Regression')[,1:Tpoints,1]
# add means to the individual score estimates (adjusted scores)
#---------------------------------------------------------------
omxRplusmeansf <- matrix(NA, nrow(omxR), ncol(omxR))
for (t in 1:ncol(omxRplusmeansf)) {
omxRplusmeansf[,t] <- omxR[,t] + meansf[t,]
}
# generate regression based scores using lavaan for comparison
#--------------------------------------------------------------
################ model estimation ################
library(lavaan)
# Model estimation as structural equation model
Model <- '
# measurement models
fT1 =~ LD1*Y1_T1 + LD2*Y2_T1 + LD3*Y3_T1 + LD4*Y4_T1 + LD5*Y5_T1
fT2 =~ LD1*Y1_T2 + LD2*Y2_T2 + LD3*Y3_T2 + LD4*Y4_T2 + LD5*Y5_T2
fT3 =~ LD1*Y1_T3 + LD2*Y2_T3 + LD3*Y3_T3 + LD4*Y4_T3 + LD5*Y5_T3
fT4 =~ LD1*Y1_T4 + LD2*Y2_T4 + LD3*Y3_T4 + LD4*Y4_T4 + LD5*Y5_T4
fT5 =~ LD1*Y1_T5 + LD2*Y2_T5 + LD3*Y3_T5 + LD4*Y4_T5 + LD5*Y5_T5
# measurement errors
Y1_T1 ~~ TH1*Y1_T1
Y2_T1 ~~ TH2*Y2_T1
Y3_T1 ~~ TH3*Y3_T1
Y4_T1 ~~ TH4*Y4_T1
Y5_T1 ~~ TH5*Y5_T1
Y1_T2 ~~ TH1*Y1_T2
Y2_T2 ~~ TH2*Y2_T2
Y3_T2 ~~ TH3*Y3_T2
Y4_T2 ~~ TH4*Y4_T2
Y5_T2 ~~ TH5*Y5_T2
Y1_T3 ~~ TH1*Y1_T3
Y2_T3 ~~ TH2*Y2_T3
Y3_T3 ~~ TH3*Y3_T3
Y4_T3 ~~ TH4*Y4_T3
Y5_T3 ~~ TH5*Y5_T3
Y1_T4 ~~ TH1*Y1_T4
Y2_T4 ~~ TH2*Y2_T4
Y3_T4 ~~ TH3*Y3_T4
Y4_T4 ~~ TH4*Y4_T4
Y5_T4 ~~ TH5*Y5_T4
Y1_T5 ~~ TH1*Y1_T5
Y2_T5 ~~ TH2*Y2_T5
Y3_T5 ~~ TH3*Y3_T5
Y4_T5 ~~ TH4*Y4_T5
Y5_T5 ~~ TH5*Y5_T5
# autoregression, equal over time
fT2 ~ BE*fT1
fT3 ~ BE*fT2
fT4 ~ BE*fT3
fT5 ~ BE*fT4
# variance eta1 and process error variances
fT1 ~~ 1*fT1 # identification/scaling eta1=1
fT2 ~~ label("PS")* fT2
fT3 ~~ label("PS")* fT3
fT4 ~~ label("PS")* fT4
fT5 ~~ label("PS")* fT5
###### latent trend variable ########
# loadings on eta
lint1 =~ 1*fT2
lint2 =~ 1*fT3
lint3 =~ 1*fT4
lint4 =~ 1*fT5
# AR across time
lint2 ~ 1*lint1
lint3 ~ 1*lint2
lint4 ~ 1*lint3
# variances
lint1 ~~ 0*lint1
lint2 ~~ 0*lint2
lint3 ~~ 0*lint3
lint4 ~~ 0*lint4
# intercepts
lint1 ~ lt*1
lint2 ~ lt*1
lint3 ~ lt*1
lint4 ~ lt*1
'
fitlavaan <- lavaan(Model , estimator = "ML", data=Y, auto.fix.first=F )
summary( fitlavaan )
lavR <- lavPredict(fitlavaan, type="lv", method="regression", se="standard", label=TRUE)[, 1:Tpoints]
# Illustrate the problem:
# plot adjusted and unadjusted OpenMx scores, lavaan scores and true scores
#--------------------------------------------------------------------------------------
Time <- 1:5 ; NJ=30
# plot some trajectories
for(j in (1:NJ)) { #
par(xpd=TRUE)
plot(Time, f.true[j,], type="l", col="red", main=paste("Regression Method in an AR1 \n with trend for j =",j),
lwd=3, ylim=c(0,11), lty=1,
ylab="true score")
lines(Time, omxRplusmeansf[j,],col="blue", lwd=2, lty=2)
lines(Time, omxR[j,],col="darkgreen", lwd=2, lty=3)
lines(Time, lavR[j,],col="orange", lwd=2, lty=4)
legend( "top", inset=c(0,0.05),
c('true', 'oxR+m' , 'oxR', 'lR' ),
lwd=c(3,2,2,2),col=c('red', 'blue', 'darkgreen', 'orange' ),
lty=c(1,2,3,4), bg="white", horiz=T, box.lty = 0)
}
I'd be grateful for any hint.
Many thanks!
Thank you for testing!
Log in or register to post comments
In reply to Thank you for testing! by AdminRobK
Ah, OK, many thanks for
Log in or register to post comments
In reply to Ah, OK, many thanks for by kaje
Thank YOU for posting about
Log in or register to post comments