You are here

factor scores curiosity -- comparing OpenMx and lavaan

4 posts / 0 new
Last post
kaje's picture
Offline
Joined: 06/05/2019 - 05:34
factor scores curiosity -- comparing OpenMx and lavaan

Hi,
when estimating a very simple latent variable autoregressive panel (AR(1)) model with an integrated trend component, I noticed that the factor scores based on the regression method do not include the trend (or maybe they are a kind of detrended). I'm wondering whether there maybe is a reason or a rationale behind this that I don't see. If you add the model implied means to the factor scores, you get the same factor scores as when using lavaan.

Here is a minimal working example:

# input parameters                                            
#-----------------                                            
J=2000 # sample size                                          
Tpoints=I=5 # time points und Items                           
P=1                                                           
beta=BE=0.75 # AR coefficient                                 
PS=1-beta**2 # process noise                                  
PH=1 # initial variance of f (at t1)                          
LD=matrix(c(0.65,0.75,0.80,0.85,0.95),Tpoints,P) # loadings   
TH=diag(1-as.vector(LD)**2) # measurement error variance      
trend <- 1                                                    
 
 
# generate true scores f     
#-----------------------     
set.seed(666666)             
 
# f at t1                    
f1 <- matrix( NA, J, 1)      
f1[,1] <- rnorm( n=J, mean=0, sd= sqrt(PH) )     
 
# final f matrix                                 
f.true <- matrix( NA , nrow=J, ncol=Tpoints )    
colnames(f.true) <-  paste0("T", formatC( 1:(Tpoints), format="fg", width=nchar(as.character(Tpoints)), flag=0 ) )  
 
for(j in 1:J) {                                  
  f <- matrix( NA , nrow=Tpoints, ncol=P )       
  for(t in 1:(Tpoints)){                         
    if(t %% Tpoints==1){                         
      f[t,] <- f1[j,]                            
    }else{                                       
      f[t,] <- (t-1)*trend + BE %*% f[t-1,]  + rnorm( n= 1, mean = 0, sd = sqrt( PS ) )     
    }                     
  }                       
  f.true[j,] <-  f        
}                         
 
 
# generate responses Y                                 
# --------------------                                 
Y <- NULL  # J x I*Tpoints dataset                     
 
for(j in 1:J) {                                        
  # observed                                           
  Y1 <- matrix( NA, nrow=Tpoints, ncol=I )             
  Y2 <- NULL                                           
 
  # var/cov matrix of epsilons, measurement error)     
  EPSILON <- matrix( NA, nrow=Tpoints, ncol=I )        
 
  for(t in 1:(Tpoints)){ # n=1; t=1                    
    EPSILON[t,] <- sapply( diag(TH), function(variance) rnorm( n=1, mean=0, sd=sqrt(variance) ) )                  
    Y1[t,] <- f.true[j,t] %*% t(LD) + EPSILON[t,]                                                                  
    Y2 <- cbind(Y2, matrix( Y1[t,], nrow=1, ncol=I, dimnames=list(NULL, c(paste0("Y", 1:I, "_", "T", t)))  ) )     
  }                                                                                                                
  Y <- as.data.frame( rbind(Y, Y2 ) , drop=F)                                                                      
}     
 
 
# model estimation in OpenMx                                               
#---------------------------                                               
dataset=Y; I=Tpoints=5; P=1;PH=1; psilab=rep("PS",Tpoints-1)               
 
# load OpenMx                                                              
library(OpenMx)                                                            
# packageVersion("OpenMx") # [1] '2.12.2'                                  
 
# use NPSOL as optimizer                                                   
if ( options()$mxOption$'Default optimizer'!='NPSOL'){                     
  mxOption(NULL, "Default optimizer", 'NPSOL')                             
}                                                                          
 
modelname <- "AR1_with_linear_trend"                                       
latVars <- c( paste0("f",1:Tpoints),paste0("lint",1:(Tpoints-1))  )        
 
ARmodelMX <-   mxModel(                                                   
  name=modelname,                                                          
  mxData( dataset, type="raw", means=NA, numObs = nrow(dataset) ),         
  manifestVars = c( colnames(dataset) ),                                   
  latentVars = latVars,                                                    
  type="RAM",                                                              
  # measurement model                                                      
  mxPath(from="f1",to=c("Y1_T1", "Y2_T1", "Y3_T1", "Y4_T1", "Y5_T1","f2"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),   
  mxPath(from="f2",to=c("Y1_T2", "Y2_T2", "Y3_T2", "Y4_T2", "Y5_T2","f3"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),   
  mxPath(from="f3",to=c("Y1_T3", "Y2_T3", "Y3_T3", "Y4_T3", "Y5_T3","f4"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),   
  mxPath(from="f4",to=c("Y1_T4", "Y2_T4", "Y3_T4", "Y4_T4", "Y5_T4","f5"), free=c(T,T,T,T,T,T),  arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ),   
  mxPath(from="f5",to=c("Y1_T5", "Y2_T5", "Y3_T5", "Y4_T5", "Y5_T5"),      free=c(T,T,T,T,T),                                                               
         arrows=1, label=c("LD1","LD2","LD3","LD4","LD5") ) ,                                                                                               
  # latent trend variables                                                                                                                                  
  mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("f2", "f3", "f4", "f5" ), free=FALSE, value=1 , arrows=1 ) ,                                       
  mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("lint1", "lint2", "lint3", "lint4"), free=FALSE, value=c(0) , arrows=2 ) ,                         
  mxPath(from=c("lint1", "lint2", "lint3"),         to=c("lint2", "lint3", "lint4"),          free=FALSE, value=c(1) , arrows=1 ) ,                         
  # disturbances                                                                                                                                            
  mxPath(from=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"),to=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"), free=T, arrows=2, label=c("TH1") ),                 
  mxPath(from=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"),to=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"), free=T, arrows=2, label=c("TH2") ),                 
  mxPath(from=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"),to=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"), free=T, arrows=2, label=c("TH3") ),                 
  mxPath(from=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"),to=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"), free=T, arrows=2, label=c("TH4") ),                 
  mxPath(from=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"),to=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"), free=T, arrows=2, label=c("TH5") ),                 
 
  # (error) variances of f                                                                                                                                  
  mxPath(from="f1",to="f1", free=FALSE , value=PH , arrows=2, label="PH" ),                                                                                 
  mxPath(from="f2",to="f2", free=TRUE ,  arrows=2, label=psilab[1] ),                                                                                       
  mxPath(from="f3",to="f3", free=TRUE ,  arrows=2, label=psilab[2] ),                                                                                       
  mxPath(from="f4",to="f4", free=TRUE ,  arrows=2, label=psilab[3] ),                                                                                       
  mxPath(from="f5",to="f5", free=TRUE ,  arrows=2, label=psilab[4] ),                                                                                       
  # intercepte auf 0                                                                                                                                        
  mxPath(from="one",to=colnames(dataset), free=FALSE, value=0, arrows=1),                                                                                   
  mxPath(from="one",to=latVars, free=c(FALSE,FALSE,FALSE,FALSE,FALSE, TRUE,TRUE,TRUE,TRUE),                                                                 
         value=c(0,0,0,0,0,  0,0,0,0),                                                                                                                      
         arrows=1,                                                                                                                                          
         label=c("noint","noint","noint","noint","noint",  "lt", "lt", "lt", "lt")  )                                                                       
 )                                                                                                                                                          
 
# fit model to the data                                                                                                                                     
#-----------------------                                                                                                                                    
fitMx   <- mxRun( ARmodelMX)                                                                                                                                
summary(fitMx)                                                                                                                                              
 
 
# Model implied means                                                                                                                                       
#--------------------                                                                                                                                       
#  B matrix                                                                                                                                                 
B <- matrix(0,9,9)                                                                                                                                          
B[2,1] <- B[3,2] <- B[4,3] <- B[5,4] <- fitMx@output$estimate["BE"]                                                                                         
B[2,6] <- B[3,7] <- B[4,8] <- B[5,9] <- 1                                                                                                                   
B[7,6] <- B[8,7] <- B[9,8]  <- 1                                                                                                                            
 
# model implied means                                                                                                                                       
meanslat <- solve( diag(1,ncol(B)) - B) %*% matrix(c(rep(0,Tpoints), rep(fitMx@output$estimate["lt"], Tpoints-1)), ncol=1)                                  
meansf <- as.matrix( meanslat[1:Tpoints,], ncol=1)                                                                                                          
 
 
# compute regression method based individual scores via OpenMx                                                                                              
#-------------------------------------------------------------                                                                                              
omxR <- mxFactorScores(fitMx, type='Regression')[,1:Tpoints,1]                                                                                              
 
 
# add means to the individual score estimates (adjusted scores)                                                                                             
#---------------------------------------------------------------                                                                                            
omxRplusmeansf <- matrix(NA, nrow(omxR), ncol(omxR))                                                                                                        
 
for (t in 1:ncol(omxRplusmeansf)) {                                                                                                                         
  omxRplusmeansf[,t] <- omxR[,t] + meansf[t,]                                                                                                               
}                                                                                                                                                           
 
 
# generate regression based scores using lavaan for comparison                                                                                              
#--------------------------------------------------------------                                                                                             
 
################ model estimation ################                                                                                                          
 
library(lavaan)                                                                                                                                             
 
# Model estimation as structural equation model                                                                                                             
Model <- '                                                                                                                                                  
# measurement models                                                                                                                                        
fT1 =~ LD1*Y1_T1 + LD2*Y2_T1 + LD3*Y3_T1 + LD4*Y4_T1 + LD5*Y5_T1                                                                                            
fT2 =~ LD1*Y1_T2 + LD2*Y2_T2 + LD3*Y3_T2 + LD4*Y4_T2 + LD5*Y5_T2                                                                                            
fT3 =~ LD1*Y1_T3 + LD2*Y2_T3 + LD3*Y3_T3 + LD4*Y4_T3 + LD5*Y5_T3                                                                                            
fT4 =~ LD1*Y1_T4 + LD2*Y2_T4 + LD3*Y3_T4 + LD4*Y4_T4 + LD5*Y5_T4                                                                                            
fT5 =~ LD1*Y1_T5 + LD2*Y2_T5 + LD3*Y3_T5 + LD4*Y4_T5 + LD5*Y5_T5                                                                                            
 
# measurement errors                                                                                                                                        
Y1_T1 ~~ TH1*Y1_T1                                                                                                                                          
Y2_T1 ~~ TH2*Y2_T1                                                                                                                                          
Y3_T1 ~~ TH3*Y3_T1                                                                                                                                          
Y4_T1 ~~ TH4*Y4_T1                                                                                                                                          
Y5_T1 ~~ TH5*Y5_T1                                                                                                                                          
 
Y1_T2 ~~ TH1*Y1_T2                                                                                                                                          
Y2_T2 ~~ TH2*Y2_T2                                                                                                                                          
Y3_T2 ~~ TH3*Y3_T2                                                                                                                                          
Y4_T2 ~~ TH4*Y4_T2                                                                                                                                          
Y5_T2 ~~ TH5*Y5_T2                                                                                                                                          
 
Y1_T3 ~~ TH1*Y1_T3                                                                                                                                          
Y2_T3 ~~ TH2*Y2_T3                                                                                                                                          
Y3_T3 ~~ TH3*Y3_T3                                                                                                                                          
Y4_T3 ~~ TH4*Y4_T3                                                                                                                                          
Y5_T3 ~~ TH5*Y5_T3                                                                                                                                          
 
Y1_T4 ~~ TH1*Y1_T4                                                                                                                                          
Y2_T4 ~~ TH2*Y2_T4                                                                                                                                          
Y3_T4 ~~ TH3*Y3_T4                                                                                                                                          
Y4_T4 ~~ TH4*Y4_T4                                                                                                                                          
Y5_T4 ~~ TH5*Y5_T4                                                                                                                                          
 
Y1_T5 ~~ TH1*Y1_T5                                                                                                                                          
Y2_T5 ~~ TH2*Y2_T5                                                                                                                                          
Y3_T5 ~~ TH3*Y3_T5                                                                                                                                          
Y4_T5 ~~ TH4*Y4_T5                                                                                                                                          
Y5_T5 ~~ TH5*Y5_T5                                                                                                                                          
 
# autoregression, equal over time                                                                                                                           
fT2 ~ BE*fT1                                                                                                                                                
fT3 ~ BE*fT2                                                                                                                                                
fT4 ~ BE*fT3                                                                                                                                                
fT5 ~ BE*fT4                                                                                                                                                
 
# variance eta1 and process error variances                                                                                                                 
fT1 ~~ 1*fT1  # identification/scaling eta1=1                                                                                                               
fT2 ~~ label("PS")* fT2                                                                                                                                     
fT3 ~~ label("PS")* fT3                                                                                                                                     
fT4 ~~ label("PS")* fT4                                                                                                                                     
fT5 ~~ label("PS")* fT5                                                                                                                                     
 
###### latent trend variable ########                                                                                                         
# loadings on eta                                                                                                                                           
lint1 =~ 1*fT2                                                                                                                                              
lint2 =~ 1*fT3                                                                                                                                              
lint3 =~ 1*fT4                                                                                                                                              
lint4 =~ 1*fT5                                                                                                                                              
 
# AR across time                                                                                                                                            
lint2 ~ 1*lint1                                                                                                                                             
lint3 ~ 1*lint2                                                                                                                                             
lint4 ~ 1*lint3                                                                                                                                             
 
# variances                                                                                                                                                 
lint1 ~~ 0*lint1                                                                                                                                            
lint2 ~~ 0*lint2                                                                                                                                            
lint3 ~~ 0*lint3                                                                                                                                            
lint4 ~~ 0*lint4                                                                                                                                            
 
# intercepts                                                                                                                                                
lint1 ~ lt*1                                                                                                                                                
lint2 ~ lt*1                                                                                                                                                
lint3 ~ lt*1                                                                                                                                                
lint4 ~ lt*1                                                                                                                                                
 
'                                                                                                                                                           
fitlavaan <- lavaan(Model , estimator = "ML", data=Y, auto.fix.first=F )                                                                                    
summary( fitlavaan )                                                                                                                                        
 
lavR <- lavPredict(fitlavaan, type="lv",  method="regression", se="standard", label=TRUE)[, 1:Tpoints]                                                      
 
# Illustrate the problem:                                                                                                                                                               
# plot adjusted and unadjusted OpenMx scores, lavaan scores and true scores                                                                                 
#--------------------------------------------------------------------------------------                                                                                                
Time <- 1:5 ; NJ=30                                                                                                                                         
# plot some trajectories                                                                                                                                      
for(j in (1:NJ)) { #                                                                                                                                        
  par(xpd=TRUE)                                                                                                                                             
  plot(Time, f.true[j,], type="l", col="red", main=paste("Regression Method in an AR1 \n with trend for j =",j),                                            
       lwd=3, ylim=c(0,11), lty=1,                                                                                                                          
       ylab="true score")                                                                                                                                   
  lines(Time, omxRplusmeansf[j,],col="blue", lwd=2, lty=2)                                                                                                  
  lines(Time, omxR[j,],col="darkgreen", lwd=2, lty=3)                                                                                                       
  lines(Time, lavR[j,],col="orange", lwd=2, lty=4)                                   
 
  legend( "top", inset=c(0,0.05),                                                    
          c('true', 'oxR+m' , 'oxR', 'lR' ),                                         
          lwd=c(3,2,2,2),col=c('red', 'blue', 'darkgreen', 'orange' ),               
          lty=c(1,2,3,4), bg="white", horiz=T,  box.lty = 0)                         
}                                                                                    

I'd be grateful for any hint.
Many thanks!

AdminRobK's picture
Offline
Joined: 01/24/2014 - 12:15
Thank you for testing!

Thank you for testing! You found a bug. I just pushed a commit to fix it.

kaje's picture
Offline
Joined: 06/05/2019 - 05:34
Ah, OK, many thanks for

Ah, OK, many thanks for checking and for your fast response! Glad I could help.

AdminRobK's picture
Offline
Joined: 01/24/2014 - 12:15
Thank YOU for posting about

Thank YOU for posting about the bug, and providing a reproducible example.