Hi,
Thanks to the script below (suggested by Athanassios Protopapas and further developed by Paolo Ghisletta, thank you!!!) I was able to obtain a large number of fit indices, but I still don't know how to get (a) the 90% confidence interval for RMSEA, (b) p close (the test of the null hypothesis that RMSEA (in the population) in less than .05), and (c) the residual correlation matrix. Can anyone help me?
If this is not too much to ask, I would also be interested in getting (d) an "effect decomposition" (i.e., a tabular summary of estimated direct, indirect, and total effects, and either their standard error or their significance level) and (e) the proportion of variance explained in each of the endogenous variables.
If there is documentation on how to get this output, please let me know where to find it. Thank you,
-- M
fit.cov <- function(indepfit,modelfit) {
options(scipen=3)
indep <- summary(indepfit)
model <- summary(modelfit)
deviance <- model$Minus2LogLikelihood
Chi <- model$Chi
indep.chi <- indep$Chi
df <- model$degreesOfFreedom
p.Chi <- 1 - pchisq(Chi, df)
Chi.df <- Chi/df
indep.df <- indep$degreesOfFreedom
N <- model$numObs
N.parms <- model$estimatedParameters
N.manifest <- length(modelfit@manifestVars)
q <- (N.manifest(N.manifest+1))/2
N.latent <- length(modelfit@latentVars)
observed.cov <- modelfit@data@observed
observed.cor <- cov2cor(observed.cov)
A <- modelfit@matrices$A@values
S <- modelfit@matrices$S@values
F <- modelfit@matrices$F@values
I <- diag(N.manifest+N.latent)
estimate.cov <- F %% (qr.solve(I-A)) %% S %% (t(qr.solve(I-A))) %% t(F)
estimate.cor <- cov2cor(estimate.cov)
Id.manifest <-diag(N.manifest)
residual.cov <- observed.cov-estimate.cov
residual.cor <- observed.cor-estimate.cor
F0 <- (Chi-df)/(N-1)
if (F0<0) {F0 <- 0}
NFI <- (indep.chi-Chi)/indep.chi
NNFI.TLI <- (indep.chi-indep.df/dfChi)/(indep.chi-indep.df)
PNFI <- (df/indep.df)NFI
RFI <- 1 - (Chi/df) / (indep.chi/indep.df)
IFI <- (indep.chi-Chi)/(indep.chi-df)
CFI <- 1.0-(Chi-df)/(indep.chi-indep.df)
if (CFI>1) {CFI=1}
PRATIO <- df/indep.df
PCFI <- PRATIOCFI
NCP <- max((Chi-df),0)
RMSEA<- sqrt(F0/df) # need confidence intervals
MFI <- exp(-0.5(Chi-df)/N)
GH <- N.manifest / (N.manifest+2((Chi-df)/(N-1)))
GFI <- 1-(sum(diag(((solve(estimate.cor)%%observed.cor)-Id.manifest)%%((solve(estimate.cor)%%observed.cor)-Id.manifest))) / sum(diag((solve(estimate.cor)%%observed.cor)%%(solve(estimate.cor)%%observed.cor))))
AGFI <- 1 - (q/df)(1-GFI)
PGFI <- GFI * df/q
AICchi <- Chi+2N.parms
AICdev <- deviance+2N.parms
BCCchi <- Chi + 2N.parms/(N-N.manifest-2)
BCCdev <- deviance + 2N.parms/(N-N.manifest-2)
BICchi <- Chi+N.parmslog(N)
BICdev <- deviance+N.parmslog(N)
CAICchi <- Chi+N.parms(log(N)+1)
CAICdev <- deviance+N.parms(log(N)+1)
ECVIchi <- 1/NAICchi
ECVIdev <- 1/NAICdev
MECVIchi <- 1/BCCchi
MECVIdev <- 1/BCCdev
RMR <- sqrt((2sum(residual.cov^2))/(2q))
SRMR <- sqrt((2sum(residual.cor^2))/(2*q))
indices <-
rbind(N,deviance,N.parms,Chi,df,p.Chi,Chi.df,AICchi,AICdev,BCCchi,BCCdev,BICchi,BICdev,CAICchi,CAICdev,RMSEA,SRMR,RMR,GFI,AGFI,PGFI,NFI,RFI,IFI,NNFI.TLI,CFI,PRATIO,PNFI,PCFI,NCP,ECVIchi,ECVIdev,MECVIchi,MECVIdev,MFI,GH)
return(indices)
}
fit.cov(indepfit,modelfit)
P.S.: For information, here is the entire R script in RAM notation I used.
Prelim stuff, create the covariance matrix
data_apgar <- read.spss("data_apgar.sav", to.data.frame = T)
manifests <- c ("apgar", "gestat", "smokes", "wgtgain")
data_subset<-data_apgar[manifests]
covmatrix<-cov(data_subset)
Run "our" model
apgar_model2 <- mxModel ("Apgar Model 2",
type="RAM",
manifestVars = manifests,
mxPath(from="smokes", to="gestat", values=.1, label="b"),
mxPath(from="wgtgain", to="apgar", values=.1, label="e"),
mxPath(from="gestat", to="apgar", values=.1, label="f"),
mxPath(from="smokes", to="wgtgain", arrows=2, values=.5, label="a"),
mxPath(from=manifests, arrows=2, free=TRUE, values=1, labels=c("da","dg","vars","varw")),
mxCI(c("a", "b", "e", "f", "vars", "varw", "dg", "da")),
mxData(covmatrix, type="cov", numObs = 60)
)
modelfit <- mxRun(apgar_model2, intervals=TRUE)
summary(modelfit)
Run the independence model
indep.model <- mxModel ("Indep",
type="RAM",
manifestVars = manifests,
mxPath(from=manifests, arrows=2, free=TRUE, values=1, labels=c("vara","varg","vars","varw")),
mxCI(c("vara","varg","vars","varw")),
mxData(covmatrix, type="cov", numObs = 60)
)
indepfit <- mxRun(indep.model, intervals=TRUE)
summary(indepfit)
Paolo's script for covariance input
fit.cov <- function(indepfit,modelfit) {
options(scipen=3)
indep <- summary(indepfit)
model <- summary(modelfit)
deviance <- model$Minus2LogLikelihood
Chi <- model$Chi
indep.chi <- indep$Chi
df <- model$degreesOfFreedom
p.Chi <- 1 - pchisq(Chi, df)
Chi.df <- Chi/df
indep.df <- indep$degreesOfFreedom
N <- model$numObs
N.parms <- model$estimatedParameters
N.manifest <- length(modelfit@manifestVars)
q <- (N.manifest(N.manifest+1))/2
N.latent <- length(modelfit@latentVars)
observed.cov <- modelfit@data@observed
observed.cor <- cov2cor(observed.cov)
A <- modelfit@matrices$A@values
S <- modelfit@matrices$S@values
F <- modelfit@matrices$F@values
I <- diag(N.manifest+N.latent)
estimate.cov <- F %% (qr.solve(I-A)) %% S %% (t(qr.solve(I-A))) %% t(F)
estimate.cor <- cov2cor(estimate.cov)
Id.manifest <-diag(N.manifest)
residual.cov <- observed.cov-estimate.cov
residual.cor <- observed.cor-estimate.cor
F0 <- (Chi-df)/(N-1)
if (F0<0) {F0 <- 0}
NFI <- (indep.chi-Chi)/indep.chi
NNFI.TLI <- (indep.chi-indep.df/dfChi)/(indep.chi-indep.df)
PNFI <- (df/indep.df)NFI
RFI <- 1 - (Chi/df) / (indep.chi/indep.df)
IFI <- (indep.chi-Chi)/(indep.chi-df)
CFI <- 1.0-(Chi-df)/(indep.chi-indep.df)
if (CFI>1) {CFI=1}
PRATIO <- df/indep.df
PCFI <- PRATIOCFI
NCP <- max((Chi-df),0)
RMSEA<- sqrt(F0/df) # need confidence intervals
MFI <- exp(-0.5(Chi-df)/N)
GH <- N.manifest / (N.manifest+2((Chi-df)/(N-1)))
GFI <- 1-(sum(diag(((solve(estimate.cor)%%observed.cor)-Id.manifest)%%((solve(estimate.cor)%%observed.cor)-Id.manifest))) / sum(diag((solve(estimate.cor)%%observed.cor)%%(solve(estimate.cor)%%observed.cor))))
AGFI <- 1 - (q/df)(1-GFI)
PGFI <- GFI * df/q
AICchi <- Chi+2N.parms
AICdev <- deviance+2N.parms
BCCchi <- Chi + 2N.parms/(N-N.manifest-2)
BCCdev <- deviance + 2N.parms/(N-N.manifest-2)
BICchi <- Chi+N.parmslog(N)
BICdev <- deviance+N.parmslog(N)
CAICchi <- Chi+N.parms(log(N)+1)
CAICdev <- deviance+N.parms(log(N)+1)
ECVIchi <- 1/NAICchi
ECVIdev <- 1/NAICdev
MECVIchi <- 1/BCCchi
MECVIdev <- 1/BCCdev
RMR <- sqrt((2sum(residual.cov^2))/(2q))
SRMR <- sqrt((2sum(residual.cor^2))/(2*q))
indices <-
rbind(N,deviance,N.parms,Chi,df,p.Chi,Chi.df,AICchi,AICdev,BCCchi,BCCdev,BICchi,BICdev,CAICchi,CAICdev,RMSEA,SRMR,RMR,GFI,AGFI,PGFI,NFI,RFI,IFI,NNFI.TLI,CFI,PRATIO,PNFI,PCFI,NCP,ECVIchi,ECVIdev,MECVIchi,MECVIdev,MFI,GH)
return(indices)
}
fit.cov(indepfit,modelfit)
I don't know of anyone who has written such a script. From the absence of replies to this thread, I think you may be able to conclude that no one else knows of one. This is your chance to make a contribution to the community!
I created a solution by porting Yves Rosseel's lavaan implementation to OpenMx. This needs to be tested against the results of other packages and could use some more pretty printing and error handling, I guess. For a model
model
, the output looks like this:omxRMSEA(model)
$RMSEA
[1] 0.1131137
$RMSEA.lower
[1] 0.08584368
$RMSEA.upper
[1] 0.1429328
$CI.lower
[1] 0.05
$CI.upper
[1] 0.95
With
RMSEA.lower
andRMSEA.upper
being the lower and upper RMSEA estimates of the CI interval specified byCI.lower
andCI.upper
, in this example 5% and 95%.This is the function:
omxRMSEA<- function(model, ci.lower=.05, ci.upper=.95) {
sm <- summary(model)
if (is.na(sm$Chi)) return(NA);
X2 <- sm$Chi
df <- sm$degreesOfFreedom
N <- sm$numObs
lower.lambda <- function(lambda) {
(pchisq(X2, df=df, ncp=lambda) - ci.upper)
}
lambda.l <- try(uniroot(f=lower.lambda, lower=0, upper=X2)$root,
silent=TRUE)
upper.lambda <- function(lambda) {
(pchisq(X2, df=df, ncp=lambda) - ci.lower)
}
N.RMSEA <- max(N, X2*4) # heuristic of lavaan. TODO: can we improve this? when does this break?
lambda.u <- try(uniroot(f=upper.lambda, lower=0,upper=N.RMSEA)$root,
silent=TRUE)
rmsea.lower <- sqrt(lambda.l/(N*df))
rmsea.upper <- sqrt(lambda.u/(N*df))
return(list(RMSEA=sqrt( max( c((X2/N)/df - 1/N, 0) ) ), RMSEA.lower=rmsea.lower, RMSEA.upper=rmsea.upper, CI.lower=ci.lower, CI.upper=ci.upper))
}
Great job Andy!
In terms of cross validation, sem returns the RMSEA CIs also... for comparison.
http://stackoverflow.com/questions/14910101/goodness-of-fit-indices-in-r
As a quick cross validation, I compared the CIs to those of Mplus for a simple factor model and it came out the same.
How do multi group RMSEA's work, especially if the number of variables and sample size differs between groups?
Steiger, J. H. (1998). A note on multiple sample extensions of the RMSEA fit index. Structural Equation Modeling, 5(4), 411-419. doi: 10.1080/10705519809540115
Looking at the draft RMSEA CI95 function (thanks!), what error checking is needed to get it working for this example?