# factor scores curiosity -- comparing OpenMx and lavaan

4 posts / 0 new
Offline
Joined: 06/05/2019 - 05:34
factor scores curiosity -- comparing OpenMx and lavaan

Hi,
when estimating a very simple latent variable autoregressive panel (AR(1)) model with an integrated trend component, I noticed that the factor scores based on the regression method do not include the trend (or maybe they are a kind of detrended). I'm wondering whether there maybe is a reason or a rationale behind this that I don't see. If you add the model implied means to the factor scores, you get the same factor scores as when using lavaan.

Here is a minimal working example:

# input parameters
#-----------------
J=2000 # sample size
Tpoints=I=5 # time points und Items
P=1
beta=BE=0.75 # AR coefficient
PS=1-beta**2 # process noise
PH=1 # initial variance of f (at t1)
LD=matrix(c(0.65,0.75,0.80,0.85,0.95),Tpoints,P) # loadings
TH=diag(1-as.vector(LD)**2) # measurement error variance
trend <- 1

# generate true scores f
#-----------------------
set.seed(666666)

# f at t1
f1 <- matrix( NA, J, 1)
f1[,1] <- rnorm( n=J, mean=0, sd= sqrt(PH) )

# final f matrix
f.true <- matrix( NA , nrow=J, ncol=Tpoints )
colnames(f.true) <-  paste0("T", formatC( 1:(Tpoints), format="fg", width=nchar(as.character(Tpoints)), flag=0 ) )

for(j in 1:J) {
f <- matrix( NA , nrow=Tpoints, ncol=P )
for(t in 1:(Tpoints)){
if(t %% Tpoints==1){
f[t,] <- f1[j,]
}else{
f[t,] <- (t-1)*trend + BE %*% f[t-1,]  + rnorm( n= 1, mean = 0, sd = sqrt( PS ) )
}
}
f.true[j,] <-  f
}

# generate responses Y
# --------------------
Y <- NULL  # J x I*Tpoints dataset

for(j in 1:J) {
# observed
Y1 <- matrix( NA, nrow=Tpoints, ncol=I )
Y2 <- NULL

# var/cov matrix of epsilons, measurement error)
EPSILON <- matrix( NA, nrow=Tpoints, ncol=I )

for(t in 1:(Tpoints)){ # n=1; t=1
EPSILON[t,] <- sapply( diag(TH), function(variance) rnorm( n=1, mean=0, sd=sqrt(variance) ) )
Y1[t,] <- f.true[j,t] %*% t(LD) + EPSILON[t,]
Y2 <- cbind(Y2, matrix( Y1[t,], nrow=1, ncol=I, dimnames=list(NULL, c(paste0("Y", 1:I, "_", "T", t)))  ) )
}
Y <- as.data.frame( rbind(Y, Y2 ) , drop=F)
}

# model estimation in OpenMx
#---------------------------
dataset=Y; I=Tpoints=5; P=1;PH=1; psilab=rep("PS",Tpoints-1)

# load OpenMx
library(OpenMx)
# packageVersion("OpenMx") # [1] '2.12.2'

# use NPSOL as optimizer
if ( options()$mxOption$'Default optimizer'!='NPSOL'){
mxOption(NULL, "Default optimizer", 'NPSOL')
}

modelname <- "AR1_with_linear_trend"
latVars <- c( paste0("f",1:Tpoints),paste0("lint",1:(Tpoints-1))  )

ARmodelMX <-   mxModel(
name=modelname,
mxData( dataset, type="raw", means=NA, numObs = nrow(dataset) ),
manifestVars = c( colnames(dataset) ),
latentVars = latVars,
type="RAM",
# measurement model
mxPath(from="f1",to=c("Y1_T1", "Y2_T1", "Y3_T1", "Y4_T1", "Y5_T1","f2"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),
mxPath(from="f2",to=c("Y1_T2", "Y2_T2", "Y3_T2", "Y4_T2", "Y5_T2","f3"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),
mxPath(from="f3",to=c("Y1_T3", "Y2_T3", "Y3_T3", "Y4_T3", "Y5_T3","f4"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),
mxPath(from="f4",to=c("Y1_T4", "Y2_T4", "Y3_T4", "Y4_T4", "Y5_T4","f5"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),
mxPath(from="f5",to=c("Y1_T5", "Y2_T5", "Y3_T5", "Y4_T5", "Y5_T5"),      free=c(T,T,T,T,T),
arrows=1, label=c("LD1","LD2","LD3","LD4","LD5") ) ,
# latent trend variables
mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("f2", "f3", "f4", "f5" ), free=FALSE, value=1 , arrows=1 ) ,
mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("lint1", "lint2", "lint3", "lint4"), free=FALSE, value=c(0) , arrows=2 ) ,
mxPath(from=c("lint1", "lint2", "lint3"),         to=c("lint2", "lint3", "lint4"),          free=FALSE, value=c(1) , arrows=1 ) ,
# disturbances
mxPath(from=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"),to=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"), free=T, arrows=2, label=c("TH1") ),
mxPath(from=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"),to=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"), free=T, arrows=2, label=c("TH2") ),
mxPath(from=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"),to=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"), free=T, arrows=2, label=c("TH3") ),
mxPath(from=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"),to=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"), free=T, arrows=2, label=c("TH4") ),
mxPath(from=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"),to=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"), free=T, arrows=2, label=c("TH5") ),

# (error) variances of f
mxPath(from="f1",to="f1", free=FALSE , value=PH , arrows=2, label="PH" ),
mxPath(from="f2",to="f2", free=TRUE ,  arrows=2, label=psilab[1] ),
mxPath(from="f3",to="f3", free=TRUE ,  arrows=2, label=psilab[2] ),
mxPath(from="f4",to="f4", free=TRUE ,  arrows=2, label=psilab[3] ),
mxPath(from="f5",to="f5", free=TRUE ,  arrows=2, label=psilab[4] ),
# intercepte auf 0
mxPath(from="one",to=colnames(dataset), free=FALSE, value=0, arrows=1),
mxPath(from="one",to=latVars, free=c(FALSE,FALSE,FALSE,FALSE,FALSE, TRUE,TRUE,TRUE,TRUE),
value=c(0,0,0,0,0,  0,0,0,0),
arrows=1,
label=c("noint","noint","noint","noint","noint",  "lt", "lt", "lt", "lt")  )
)

# fit model to the data
#-----------------------
fitMx   <- mxRun( ARmodelMX)
summary(fitMx)

# Model implied means
#--------------------
#  B matrix
B <- matrix(0,9,9)
B[2,1] <- B[3,2] <- B[4,3] <- B[5,4] <- fitMx@output$estimate["BE"] B[2,6] <- B[3,7] <- B[4,8] <- B[5,9] <- 1 B[7,6] <- B[8,7] <- B[9,8] <- 1 # model implied means meanslat <- solve( diag(1,ncol(B)) - B) %*% matrix(c(rep(0,Tpoints), rep(fitMx@output$estimate["lt"], Tpoints-1)), ncol=1)
meansf <- as.matrix( meanslat[1:Tpoints,], ncol=1)

# compute regression method based individual scores via OpenMx
#-------------------------------------------------------------
omxR <- mxFactorScores(fitMx, type='Regression')[,1:Tpoints,1]

# add means to the individual score estimates (adjusted scores)
#---------------------------------------------------------------
omxRplusmeansf <- matrix(NA, nrow(omxR), ncol(omxR))

for (t in 1:ncol(omxRplusmeansf)) {
omxRplusmeansf[,t] <- omxR[,t] + meansf[t,]
}

# generate regression based scores using lavaan for comparison
#--------------------------------------------------------------

################ model estimation ################

library(lavaan)

# Model estimation as structural equation model
Model <- '
# measurement models
fT1 =~ LD1*Y1_T1 + LD2*Y2_T1 + LD3*Y3_T1 + LD4*Y4_T1 + LD5*Y5_T1
fT2 =~ LD1*Y1_T2 + LD2*Y2_T2 + LD3*Y3_T2 + LD4*Y4_T2 + LD5*Y5_T2
fT3 =~ LD1*Y1_T3 + LD2*Y2_T3 + LD3*Y3_T3 + LD4*Y4_T3 + LD5*Y5_T3
fT4 =~ LD1*Y1_T4 + LD2*Y2_T4 + LD3*Y3_T4 + LD4*Y4_T4 + LD5*Y5_T4
fT5 =~ LD1*Y1_T5 + LD2*Y2_T5 + LD3*Y3_T5 + LD4*Y4_T5 + LD5*Y5_T5

# measurement errors
Y1_T1 ~~ TH1*Y1_T1
Y2_T1 ~~ TH2*Y2_T1
Y3_T1 ~~ TH3*Y3_T1
Y4_T1 ~~ TH4*Y4_T1
Y5_T1 ~~ TH5*Y5_T1

Y1_T2 ~~ TH1*Y1_T2
Y2_T2 ~~ TH2*Y2_T2
Y3_T2 ~~ TH3*Y3_T2
Y4_T2 ~~ TH4*Y4_T2
Y5_T2 ~~ TH5*Y5_T2

Y1_T3 ~~ TH1*Y1_T3
Y2_T3 ~~ TH2*Y2_T3
Y3_T3 ~~ TH3*Y3_T3
Y4_T3 ~~ TH4*Y4_T3
Y5_T3 ~~ TH5*Y5_T3

Y1_T4 ~~ TH1*Y1_T4
Y2_T4 ~~ TH2*Y2_T4
Y3_T4 ~~ TH3*Y3_T4
Y4_T4 ~~ TH4*Y4_T4
Y5_T4 ~~ TH5*Y5_T4

Y1_T5 ~~ TH1*Y1_T5
Y2_T5 ~~ TH2*Y2_T5
Y3_T5 ~~ TH3*Y3_T5
Y4_T5 ~~ TH4*Y4_T5
Y5_T5 ~~ TH5*Y5_T5

# autoregression, equal over time
fT2 ~ BE*fT1
fT3 ~ BE*fT2
fT4 ~ BE*fT3
fT5 ~ BE*fT4

# variance eta1 and process error variances
fT1 ~~ 1*fT1  # identification/scaling eta1=1
fT2 ~~ label("PS")* fT2
fT3 ~~ label("PS")* fT3
fT4 ~~ label("PS")* fT4
fT5 ~~ label("PS")* fT5

###### latent trend variable ########
# loadings on eta
lint1 =~ 1*fT2
lint2 =~ 1*fT3
lint3 =~ 1*fT4
lint4 =~ 1*fT5

# AR across time
lint2 ~ 1*lint1
lint3 ~ 1*lint2
lint4 ~ 1*lint3

# variances
lint1 ~~ 0*lint1
lint2 ~~ 0*lint2
lint3 ~~ 0*lint3
lint4 ~~ 0*lint4

# intercepts
lint1 ~ lt*1
lint2 ~ lt*1
lint3 ~ lt*1
lint4 ~ lt*1

'
fitlavaan <- lavaan(Model , estimator = "ML", data=Y, auto.fix.first=F )
summary( fitlavaan )

lavR <- lavPredict(fitlavaan, type="lv",  method="regression", se="standard", label=TRUE)[, 1:Tpoints]

# Illustrate the problem:
# plot adjusted and unadjusted OpenMx scores, lavaan scores and true scores
#--------------------------------------------------------------------------------------
Time <- 1:5 ; NJ=30
# plot some trajectories
for(j in (1:NJ)) { #
par(xpd=TRUE)
plot(Time, f.true[j,], type="l", col="red", main=paste("Regression Method in an AR1 \n with trend for j =",j),
lwd=3, ylim=c(0,11), lty=1,
ylab="true score")
lines(Time, omxRplusmeansf[j,],col="blue", lwd=2, lty=2)
lines(Time, omxR[j,],col="darkgreen", lwd=2, lty=3)
lines(Time, lavR[j,],col="orange", lwd=2, lty=4)

legend( "top", inset=c(0,0.05),
c('true', 'oxR+m' , 'oxR', 'lR' ),
lwd=c(3,2,2,2),col=c('red', 'blue', 'darkgreen', 'orange' ),
lty=c(1,2,3,4), bg="white", horiz=T,  box.lty = 0)
}                                                                                    

I'd be grateful for any hint.
Many thanks!

Offline
Joined: 01/24/2014 - 12:15
Thank you for testing!

Thank you for testing! You found a bug. I just pushed a commit to fix it.

Offline
Joined: 06/05/2019 - 05:34
Ah, OK, many thanks for

Ah, OK, many thanks for checking and for your fast response! Glad I could help.

Offline
Joined: 01/24/2014 - 12:15
Thank YOU for posting about

Thank YOU for posting about the bug, and providing a reproducible example.