factor scores curiosity -- comparing OpenMx and lavaan

Posted on Tue, 06/11/2019 - 12:51

kaje Joined: 06/05/2019

Forums

Hi,
when estimating a very simple latent variable autoregressive panel (AR(1)) model with an integrated trend component, I noticed that the factor scores based on the regression method do not include the trend (or maybe they are a kind of detrended). I'm wondering whether there maybe is a reason or a rationale behind this that I don't see. If you add the model implied means to the factor scores, you get the same factor scores as when using lavaan.

Here is a minimal working example:

# input parameters #----------------- J=2000 # sample size Tpoints=I=5 # time points und Items P=1 beta=BE=0.75 # AR coefficient PS=1-beta**2 # process noise PH=1 # initial variance of f (at t1) LD=matrix(c(0.65,0.75,0.80,0.85,0.95),Tpoints,P) # loadings TH=diag(1-as.vector(LD)**2) # measurement error variance trend <- 1


# generate true scores f

#-----------------------

set.seed(666666)             
# f at t1

f1 <- matrix( NA, J, 1)

f1[,1] <- rnorm( n=J, mean=0, sd= sqrt(PH) )     
# final f matrix

f.true <- matrix( NA , nrow=J, ncol=Tpoints )

colnames(f.true) <-  paste0("T", formatC( 1:(Tpoints), format="fg", width=nchar(as.character(Tpoints)), flag=0 ) )  
for(j in 1:J) {

  f <- matrix( NA , nrow=Tpoints, ncol=P )

  for(t in 1:(Tpoints)){

    if(t %% Tpoints==1){

      f[t,] <- f1[j,]

    }else{

      f[t,] <- (t-1)*trend + BE %*% f[t-1,]  + rnorm( n= 1, mean = 0, sd = sqrt( PS ) )

    }

  }

  f.true[j,] <-  f

}                         
# generate responses Y

# --------------------

Y <- NULL  # J x I*Tpoints dataset                     
for(j in 1:J) {

  # observed

  Y1 <- matrix( NA, nrow=Tpoints, ncol=I )

  Y2 <- NULL                                           
  # var/cov matrix of epsilons, measurement error)

  EPSILON <- matrix( NA, nrow=Tpoints, ncol=I )        
  for(t in 1:(Tpoints)){ # n=1; t=1

    EPSILON[t,] <- sapply( diag(TH), function(variance) rnorm( n=1, mean=0, sd=sqrt(variance) ) )

    Y1[t,] <- f.true[j,t] %*% t(LD) + EPSILON[t,]

    Y2 <- cbind(Y2, matrix( Y1[t,], nrow=1, ncol=I, dimnames=list(NULL, c(paste0("Y", 1:I, "_", "T", t)))  ) )

  }

  Y <- as.data.frame( rbind(Y, Y2 ) , drop=F)

}     
# model estimation in OpenMx

#---------------------------

dataset=Y; I=Tpoints=5; P=1;PH=1; psilab=rep("PS",Tpoints-1)               
# load OpenMx

library(OpenMx)

# packageVersion("OpenMx") # [1] '2.12.2'                                  
# use NPSOL as optimizer

if ( options()$mxOption$'Default optimizer'!='NPSOL'){

  mxOption(NULL, "Default optimizer", 'NPSOL')

}                                                                          
modelname <- "AR1_with_linear_trend"

latVars <- c( paste0("f",1:Tpoints),paste0("lint",1:(Tpoints-1))  )        
ARmodelMX <- 	mxModel(

  name=modelname,

  mxData( dataset, type="raw", means=NA, numObs = nrow(dataset) ),

  manifestVars = c( colnames(dataset) ),

  latentVars = latVars,

  type="RAM",

  # measurement model

  mxPath(from="f1",to=c("Y1_T1", "Y2_T1", "Y3_T1", "Y4_T1", "Y5_T1","f2"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),

  mxPath(from="f2",to=c("Y1_T2", "Y2_T2", "Y3_T2", "Y4_T2", "Y5_T2","f3"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),

  mxPath(from="f3",to=c("Y1_T3", "Y2_T3", "Y3_T3", "Y4_T3", "Y5_T3","f4"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),

  mxPath(from="f4",to=c("Y1_T4", "Y2_T4", "Y3_T4", "Y4_T4", "Y5_T4","f5"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),

  mxPath(from="f5",to=c("Y1_T5", "Y2_T5", "Y3_T5", "Y4_T5", "Y5_T5"),      free=c(T,T,T,T,T),

         arrows=1, label=c("LD1","LD2","LD3","LD4","LD5") ) ,

  # latent trend variables

  mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("f2", "f3", "f4", "f5" ), free=FALSE, value=1 , arrows=1 ) ,

  mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("lint1", "lint2", "lint3", "lint4"), free=FALSE, value=c(0) , arrows=2 ) ,

  mxPath(from=c("lint1", "lint2", "lint3"),         to=c("lint2", "lint3", "lint4"),          free=FALSE, value=c(1) , arrows=1 ) ,

  # disturbances

  mxPath(from=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"),to=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"), free=T, arrows=2, label=c("TH1") ),

  mxPath(from=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"),to=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"), free=T, arrows=2, label=c("TH2") ),

  mxPath(from=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"),to=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"), free=T, arrows=2, label=c("TH3") ),

  mxPath(from=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"),to=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"), free=T, arrows=2, label=c("TH4") ),

  mxPath(from=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"),to=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"), free=T, arrows=2, label=c("TH5") ),                 
  # (error) variances of f

  mxPath(from="f1",to="f1", free=FALSE , value=PH , arrows=2, label="PH" ),

  mxPath(from="f2",to="f2", free=TRUE ,  arrows=2, label=psilab[1] ),

  mxPath(from="f3",to="f3", free=TRUE ,  arrows=2, label=psilab[2] ),

  mxPath(from="f4",to="f4", free=TRUE ,  arrows=2, label=psilab[3] ),

  mxPath(from="f5",to="f5", free=TRUE ,  arrows=2, label=psilab[4] ),

  # intercepte auf 0

  mxPath(from="one",to=colnames(dataset), free=FALSE, value=0, arrows=1),

  mxPath(from="one",to=latVars, free=c(FALSE,FALSE,FALSE,FALSE,FALSE, TRUE,TRUE,TRUE,TRUE),

         value=c(0,0,0,0,0,  0,0,0,0),

         arrows=1,

         label=c("noint","noint","noint","noint","noint",  "lt", "lt", "lt", "lt")  )

 )                                                                                                                                                          
# fit model to the data

#-----------------------

fitMx	<- mxRun( ARmodelMX)

summary(fitMx)                                                                                                                                              
# Model implied means

#--------------------

#  B matrix

B <- matrix(0,9,9)

B[2,1] <- B[3,2] <- B[4,3] <- B[5,4] <- fitMx@output$estimate["BE"]

B[2,6] <- B[3,7] <- B[4,8] <- B[5,9] <- 1

B[7,6] <- B[8,7] <- B[9,8]  <- 1                                                                                                                            
# model implied means

meanslat <- solve( diag(1,ncol(B)) - B) %*% matrix(c(rep(0,Tpoints), rep(fitMx@output$estimate["lt"], Tpoints-1)), ncol=1)

meansf <- as.matrix( meanslat[1:Tpoints,], ncol=1)                                                                                                          
# compute regression method based individual scores via OpenMx

#-------------------------------------------------------------

omxR <- mxFactorScores(fitMx, type='Regression')[,1:Tpoints,1]                                                                                              
# add means to the individual score estimates (adjusted scores)

#---------------------------------------------------------------

omxRplusmeansf <- matrix(NA, nrow(omxR), ncol(omxR))                                                                                                        
for (t in 1:ncol(omxRplusmeansf)) {

  omxRplusmeansf[,t] <- omxR[,t] + meansf[t,]

}                                                                                                                                                           
# generate regression based scores using lavaan for comparison

#--------------------------------------------------------------                                                                                             
################ model estimation ################                                                                                                          
library(lavaan)                                                                                                                                             
# Model estimation as structural equation model

Model <- '

# measurement models

fT1 =~ LD1*Y1_T1 + LD2*Y2_T1 + LD3*Y3_T1 + LD4*Y4_T1 + LD5*Y5_T1

fT2 =~ LD1*Y1_T2 + LD2*Y2_T2 + LD3*Y3_T2 + LD4*Y4_T2 + LD5*Y5_T2

fT3 =~ LD1*Y1_T3 + LD2*Y2_T3 + LD3*Y3_T3 + LD4*Y4_T3 + LD5*Y5_T3

fT4 =~ LD1*Y1_T4 + LD2*Y2_T4 + LD3*Y3_T4 + LD4*Y4_T4 + LD5*Y5_T4

fT5 =~ LD1*Y1_T5 + LD2*Y2_T5 + LD3*Y3_T5 + LD4*Y4_T5 + LD5*Y5_T5                                                                                            
# measurement errors

Y1_T1 ~~ TH1*Y1_T1

Y2_T1 ~~ TH2*Y2_T1

Y3_T1 ~~ TH3*Y3_T1

Y4_T1 ~~ TH4*Y4_T1

Y5_T1 ~~ TH5*Y5_T1                                                                                                                                          
Y1_T2 ~~ TH1*Y1_T2

Y2_T2 ~~ TH2*Y2_T2

Y3_T2 ~~ TH3*Y3_T2

Y4_T2 ~~ TH4*Y4_T2

Y5_T2 ~~ TH5*Y5_T2                                                                                                                                          
Y1_T3 ~~ TH1*Y1_T3

Y2_T3 ~~ TH2*Y2_T3

Y3_T3 ~~ TH3*Y3_T3

Y4_T3 ~~ TH4*Y4_T3

Y5_T3 ~~ TH5*Y5_T3                                                                                                                                          
Y1_T4 ~~ TH1*Y1_T4

Y2_T4 ~~ TH2*Y2_T4

Y3_T4 ~~ TH3*Y3_T4

Y4_T4 ~~ TH4*Y4_T4

Y5_T4 ~~ TH5*Y5_T4                                                                                                                                          
Y1_T5 ~~ TH1*Y1_T5

Y2_T5 ~~ TH2*Y2_T5

Y3_T5 ~~ TH3*Y3_T5

Y4_T5 ~~ TH4*Y4_T5

Y5_T5 ~~ TH5*Y5_T5                                                                                                                                          
# autoregression, equal over time

fT2 ~ BE*fT1

fT3 ~ BE*fT2

fT4 ~ BE*fT3

fT5 ~ BE*fT4                                                                                                                                                
# variance eta1 and process error variances

fT1 ~~ 1*fT1  # identification/scaling eta1=1

fT2 ~~ label("PS")* fT2

fT3 ~~ label("PS")* fT3

fT4 ~~ label("PS")* fT4

fT5 ~~ label("PS")* fT5                                                                                                                                     
###### latent trend variable ########

# loadings on eta

lint1 =~ 1*fT2

lint2 =~ 1*fT3

lint3 =~ 1*fT4

lint4 =~ 1*fT5                                                                                                                                              
# AR across time

lint2 ~ 1*lint1

lint3 ~ 1*lint2

lint4 ~ 1*lint3                                                                                                                                             
# variances

lint1 ~~ 0*lint1

lint2 ~~ 0*lint2

lint3 ~~ 0*lint3

lint4 ~~ 0*lint4                                                                                                                                            
# intercepts

lint1 ~ lt*1

lint2 ~ lt*1

lint3 ~ lt*1

lint4 ~ lt*1                                                                                                                                                
'

fitlavaan <- lavaan(Model , estimator = "ML", data=Y, auto.fix.first=F )

summary( fitlavaan )                                                                                                                                        
lavR <- lavPredict(fitlavaan, type="lv",  method="regression", se="standard", label=TRUE)[, 1:Tpoints]                                                      
# Illustrate the problem:

# plot adjusted and unadjusted OpenMx scores, lavaan scores and true scores

#--------------------------------------------------------------------------------------

Time <- 1:5 ; NJ=30

# plot some trajectories

for(j in (1:NJ)) { #

  par(xpd=TRUE)

  plot(Time, f.true[j,], type="l", col="red", main=paste("Regression Method in an AR1 \n with trend for j =",j),

       lwd=3, ylim=c(0,11), lty=1,

       ylab="true score")

  lines(Time, omxRplusmeansf[j,],col="blue", lwd=2, lty=2)

  lines(Time, omxR[j,],col="darkgreen", lwd=2, lty=3)

  lines(Time, lavR[j,],col="orange", lwd=2, lty=4)                                   
  legend( "top", inset=c(0,0.05),

          c('true', 'oxR+m' , 'oxR', 'lR' ),

          lwd=c(3,2,2,2),col=c('red', 'blue', 'darkgreen', 'orange' ),

          lty=c(1,2,3,4), bg="white", horiz=T,  box.lty = 0)

}

I'd be grateful for any hint.
Many thanks!

Replied on Tue, 06/11/2019 - 15:19

AdminRobK Joined: Jan 24, 2014

Thank you for testing!

Thank you for testing! You found a bug. I just pushed a commit to fix it.

Log in or register to post comments

Replied on Fri, 06/14/2019 - 16:36

kaje Joined: Jun 05, 2019

Ah, OK, many thanks for

Ah, OK, many thanks for checking and for your fast response! Glad I could help.

Log in or register to post comments

Replied on Fri, 06/14/2019 - 18:17

News

Recent Posts

factor scores curiosity -- comparing OpenMx and lavaan

Thank you for testing!

Ah, OK, many thanks for

Thank YOU for posting about

News

Recent Posts