Hi,
when estimating a very simple latent variable autoregressive panel (AR(1)) model with an integrated trend component, I noticed that the factor scores based on the regression method do not include the trend (or maybe they are a kind of detrended). I'm wondering whether there maybe is a reason or a rationale behind this that I don't see. If you add the model implied means to the factor scores, you get the same factor scores as when using lavaan.
Here is a minimal working example:
# input parameters #----------------- J=2000 # sample size Tpoints=I=5 # time points und Items P=1 beta=BE=0.75 # AR coefficient PS=1-beta**2 # process noise PH=1 # initial variance of f (at t1) LD=matrix(c(0.65,0.75,0.80,0.85,0.95),Tpoints,P) # loadings TH=diag(1-as.vector(LD)**2) # measurement error variance trend <- 1 # generate true scores f #----------------------- set.seed(666666) # f at t1 f1 <- matrix( NA, J, 1) f1[,1] <- rnorm( n=J, mean=0, sd= sqrt(PH) ) # final f matrix f.true <- matrix( NA , nrow=J, ncol=Tpoints ) colnames(f.true) <- paste0("T", formatC( 1:(Tpoints), format="fg", width=nchar(as.character(Tpoints)), flag=0 ) ) for(j in 1:J) { f <- matrix( NA , nrow=Tpoints, ncol=P ) for(t in 1:(Tpoints)){ if(t %% Tpoints==1){ f[t,] <- f1[j,] }else{ f[t,] <- (t-1)*trend + BE %*% f[t-1,] + rnorm( n= 1, mean = 0, sd = sqrt( PS ) ) } } f.true[j,] <- f } # generate responses Y # -------------------- Y <- NULL # J x I*Tpoints dataset for(j in 1:J) { # observed Y1 <- matrix( NA, nrow=Tpoints, ncol=I ) Y2 <- NULL # var/cov matrix of epsilons, measurement error) EPSILON <- matrix( NA, nrow=Tpoints, ncol=I ) for(t in 1:(Tpoints)){ # n=1; t=1 EPSILON[t,] <- sapply( diag(TH), function(variance) rnorm( n=1, mean=0, sd=sqrt(variance) ) ) Y1[t,] <- f.true[j,t] %*% t(LD) + EPSILON[t,] Y2 <- cbind(Y2, matrix( Y1[t,], nrow=1, ncol=I, dimnames=list(NULL, c(paste0("Y", 1:I, "_", "T", t))) ) ) } Y <- as.data.frame( rbind(Y, Y2 ) , drop=F) } # model estimation in OpenMx #--------------------------- dataset=Y; I=Tpoints=5; P=1;PH=1; psilab=rep("PS",Tpoints-1) # load OpenMx library(OpenMx) # packageVersion("OpenMx") # [1] '2.12.2' # use NPSOL as optimizer if ( options()$mxOption$'Default optimizer'!='NPSOL'){ mxOption(NULL, "Default optimizer", 'NPSOL') } modelname <- "AR1_with_linear_trend" latVars <- c( paste0("f",1:Tpoints),paste0("lint",1:(Tpoints-1)) ) ARmodelMX <- mxModel( name=modelname, mxData( dataset, type="raw", means=NA, numObs = nrow(dataset) ), manifestVars = c( colnames(dataset) ), latentVars = latVars, type="RAM", # measurement model mxPath(from="f1",to=c("Y1_T1", "Y2_T1", "Y3_T1", "Y4_T1", "Y5_T1","f2"), free=c(T,T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ), mxPath(from="f2",to=c("Y1_T2", "Y2_T2", "Y3_T2", "Y4_T2", "Y5_T2","f3"), free=c(T,T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ), mxPath(from="f3",to=c("Y1_T3", "Y2_T3", "Y3_T3", "Y4_T3", "Y5_T3","f4"), free=c(T,T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ), mxPath(from="f4",to=c("Y1_T4", "Y2_T4", "Y3_T4", "Y4_T4", "Y5_T4","f5"), free=c(T,T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5","BE") ), mxPath(from="f5",to=c("Y1_T5", "Y2_T5", "Y3_T5", "Y4_T5", "Y5_T5"), free=c(T,T,T,T,T), arrows=1, label=c("LD1","LD2","LD3","LD4","LD5") ) , # latent trend variables mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("f2", "f3", "f4", "f5" ), free=FALSE, value=1 , arrows=1 ) , mxPath(from=c("lint1", "lint2", "lint3", "lint4"),to=c("lint1", "lint2", "lint3", "lint4"), free=FALSE, value=c(0) , arrows=2 ) , mxPath(from=c("lint1", "lint2", "lint3"), to=c("lint2", "lint3", "lint4"), free=FALSE, value=c(1) , arrows=1 ) , # disturbances mxPath(from=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"),to=c("Y1_T1","Y1_T2","Y1_T3","Y1_T4","Y1_T5"), free=T, arrows=2, label=c("TH1") ), mxPath(from=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"),to=c("Y2_T1","Y2_T2","Y2_T3","Y2_T4","Y2_T5"), free=T, arrows=2, label=c("TH2") ), mxPath(from=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"),to=c("Y3_T1","Y3_T2","Y3_T3","Y3_T4","Y3_T5"), free=T, arrows=2, label=c("TH3") ), mxPath(from=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"),to=c("Y4_T1","Y4_T2","Y4_T3","Y4_T4","Y4_T5"), free=T, arrows=2, label=c("TH4") ), mxPath(from=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"),to=c("Y5_T1","Y5_T2","Y5_T3","Y5_T4","Y5_T5"), free=T, arrows=2, label=c("TH5") ), # (error) variances of f mxPath(from="f1",to="f1", free=FALSE , value=PH , arrows=2, label="PH" ), mxPath(from="f2",to="f2", free=TRUE , arrows=2, label=psilab[1] ), mxPath(from="f3",to="f3", free=TRUE , arrows=2, label=psilab[2] ), mxPath(from="f4",to="f4", free=TRUE , arrows=2, label=psilab[3] ), mxPath(from="f5",to="f5", free=TRUE , arrows=2, label=psilab[4] ), # intercepte auf 0 mxPath(from="one",to=colnames(dataset), free=FALSE, value=0, arrows=1), mxPath(from="one",to=latVars, free=c(FALSE,FALSE,FALSE,FALSE,FALSE, TRUE,TRUE,TRUE,TRUE), value=c(0,0,0,0,0, 0,0,0,0), arrows=1, label=c("noint","noint","noint","noint","noint", "lt", "lt", "lt", "lt") ) ) # fit model to the data #----------------------- fitMx <- mxRun( ARmodelMX) summary(fitMx) # Model implied means #-------------------- # B matrix B <- matrix(0,9,9) B[2,1] <- B[3,2] <- B[4,3] <- B[5,4] <- fitMx@output$estimate["BE"] B[2,6] <- B[3,7] <- B[4,8] <- B[5,9] <- 1 B[7,6] <- B[8,7] <- B[9,8] <- 1 # model implied means meanslat <- solve( diag(1,ncol(B)) - B) %*% matrix(c(rep(0,Tpoints), rep(fitMx@output$estimate["lt"], Tpoints-1)), ncol=1) meansf <- as.matrix( meanslat[1:Tpoints,], ncol=1) # compute regression method based individual scores via OpenMx #------------------------------------------------------------- omxR <- mxFactorScores(fitMx, type='Regression')[,1:Tpoints,1] # add means to the individual score estimates (adjusted scores) #--------------------------------------------------------------- omxRplusmeansf <- matrix(NA, nrow(omxR), ncol(omxR)) for (t in 1:ncol(omxRplusmeansf)) { omxRplusmeansf[,t] <- omxR[,t] + meansf[t,] } # generate regression based scores using lavaan for comparison #-------------------------------------------------------------- ################ model estimation ################ library(lavaan) # Model estimation as structural equation model Model <- ' # measurement models fT1 =~ LD1*Y1_T1 + LD2*Y2_T1 + LD3*Y3_T1 + LD4*Y4_T1 + LD5*Y5_T1 fT2 =~ LD1*Y1_T2 + LD2*Y2_T2 + LD3*Y3_T2 + LD4*Y4_T2 + LD5*Y5_T2 fT3 =~ LD1*Y1_T3 + LD2*Y2_T3 + LD3*Y3_T3 + LD4*Y4_T3 + LD5*Y5_T3 fT4 =~ LD1*Y1_T4 + LD2*Y2_T4 + LD3*Y3_T4 + LD4*Y4_T4 + LD5*Y5_T4 fT5 =~ LD1*Y1_T5 + LD2*Y2_T5 + LD3*Y3_T5 + LD4*Y4_T5 + LD5*Y5_T5 # measurement errors Y1_T1 ~~ TH1*Y1_T1 Y2_T1 ~~ TH2*Y2_T1 Y3_T1 ~~ TH3*Y3_T1 Y4_T1 ~~ TH4*Y4_T1 Y5_T1 ~~ TH5*Y5_T1 Y1_T2 ~~ TH1*Y1_T2 Y2_T2 ~~ TH2*Y2_T2 Y3_T2 ~~ TH3*Y3_T2 Y4_T2 ~~ TH4*Y4_T2 Y5_T2 ~~ TH5*Y5_T2 Y1_T3 ~~ TH1*Y1_T3 Y2_T3 ~~ TH2*Y2_T3 Y3_T3 ~~ TH3*Y3_T3 Y4_T3 ~~ TH4*Y4_T3 Y5_T3 ~~ TH5*Y5_T3 Y1_T4 ~~ TH1*Y1_T4 Y2_T4 ~~ TH2*Y2_T4 Y3_T4 ~~ TH3*Y3_T4 Y4_T4 ~~ TH4*Y4_T4 Y5_T4 ~~ TH5*Y5_T4 Y1_T5 ~~ TH1*Y1_T5 Y2_T5 ~~ TH2*Y2_T5 Y3_T5 ~~ TH3*Y3_T5 Y4_T5 ~~ TH4*Y4_T5 Y5_T5 ~~ TH5*Y5_T5 # autoregression, equal over time fT2 ~ BE*fT1 fT3 ~ BE*fT2 fT4 ~ BE*fT3 fT5 ~ BE*fT4 # variance eta1 and process error variances fT1 ~~ 1*fT1 # identification/scaling eta1=1 fT2 ~~ label("PS")* fT2 fT3 ~~ label("PS")* fT3 fT4 ~~ label("PS")* fT4 fT5 ~~ label("PS")* fT5 ###### latent trend variable ######## # loadings on eta lint1 =~ 1*fT2 lint2 =~ 1*fT3 lint3 =~ 1*fT4 lint4 =~ 1*fT5 # AR across time lint2 ~ 1*lint1 lint3 ~ 1*lint2 lint4 ~ 1*lint3 # variances lint1 ~~ 0*lint1 lint2 ~~ 0*lint2 lint3 ~~ 0*lint3 lint4 ~~ 0*lint4 # intercepts lint1 ~ lt*1 lint2 ~ lt*1 lint3 ~ lt*1 lint4 ~ lt*1 ' fitlavaan <- lavaan(Model , estimator = "ML", data=Y, auto.fix.first=F ) summary( fitlavaan ) lavR <- lavPredict(fitlavaan, type="lv", method="regression", se="standard", label=TRUE)[, 1:Tpoints] # Illustrate the problem: # plot adjusted and unadjusted OpenMx scores, lavaan scores and true scores #-------------------------------------------------------------------------------------- Time <- 1:5 ; NJ=30 # plot some trajectories for(j in (1:NJ)) { # par(xpd=TRUE) plot(Time, f.true[j,], type="l", col="red", main=paste("Regression Method in an AR1 \n with trend for j =",j), lwd=3, ylim=c(0,11), lty=1, ylab="true score") lines(Time, omxRplusmeansf[j,],col="blue", lwd=2, lty=2) lines(Time, omxR[j,],col="darkgreen", lwd=2, lty=3) lines(Time, lavR[j,],col="orange", lwd=2, lty=4) legend( "top", inset=c(0,0.05), c('true', 'oxR+m' , 'oxR', 'lR' ), lwd=c(3,2,2,2),col=c('red', 'blue', 'darkgreen', 'orange' ), lty=c(1,2,3,4), bg="white", horiz=T, box.lty = 0) }
I'd be grateful for any hint.
Many thanks!
Thank you for testing! You found a bug. I just pushed a commit to fix it.
Ah, OK, many thanks for checking and for your fast response! Glad I could help.
Thank YOU for posting about the bug, and providing a reproducible example.