Hello,
I am trying to fit a trivariate model with 3 continuous variables. I am also trying to fit the independent and common pathways models but I do not know if my script is correct.
My models have:
ADE: 27 parameters
Independent pathway model: 27 parameters
Common pathway model: 24 parameters
I think that number of parameters is correct but I am not sure. Also, I have negative values (for example dl_1_1, fl_2_1) and I do not know if it is correct.
Finally, I am not sure how to get the standardized results for this model. would be correct the square of each value? For al_1_1, dl_1_1 and el_1_1, would be square root?
Thank you so much!
Here the script:
# Select Variables for Analysis vars <- c('IN_Ln','CA_Ln','MF_Ln') # list of variables names nv <- 3 # number of variables ntv <- nv*2 # number of total variables selVars <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="") # Select Covariates for Analysis covVars <- c('age', "Sex1" , "Sex2") nc <- 3 # number of covariates ordData <- twinData # Select Data for Analysis mzData <- subset(ordData, Zyg==1| Zyg==3, c(selVars, covVars)) dzData <- subset(ordData, Zyg==2 | Zyg==4| Zyg==5, c(selVars, covVars)) # Generate Descriptive Statistics colMeans(mzData,na.rm=TRUE) colMeans(dzData,na.rm=TRUE) cov(mzData,use="complete") cov(dzData,use="complete") # Set Starting Values svMe <- c(1.2,1.9,1.1) # start value for means svPa <- .4 # start value for path coefficient svPaD <- vech(diag(svPa,nv,nv)) # start values for diagonal of covariance matrix svPe <- .8 # start value for path coefficient for e svPeD <- vech(diag(svPe,nv,nv)) # start values for diagonal of covariance matrix lbPa <- .0001 # start value for lower bounds lbPaD <- diag(lbPa,nv,nv) # lower bounds for diagonal of covariance matrix lbPaD[lower.tri(lbPaD)] <- -10 # lower bounds for below diagonal elements lbPaD[upper.tri(lbPaD)] <- NA # lower bounds for above diagonal elements # Create Labels labMe <- paste("mean",vars,sep="_") labBe <- labFull("beta",nc,1) # ------------------------------------------------------------------------------ # PREPARE MODEL # ADE Model # Create Matrices for Covariates and linear Regression Coefficients defAge <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data.age"), name="Age" ) pathB1 <- mxMatrix( type="Full", nrow=nc, ncol=1, free=TRUE, values=.01, label=c("b11","b12", "b13"), name="b1" ) defSex <- mxMatrix( type="Full", nrow=1, ncol=6, free=FALSE, labels=c("data.Sex1", "data.Sex1", "data.Sex1","data.Sex2", "data.Sex2", "data.Sex2"), name="Sex" ) pathB2 <- mxMatrix( type="Full", nrow=1, ncol=6, free=TRUE, values=.01, label=c("b21", "b22","b23", "b21", "b22", "b23"), name="b2" ) # Create Algebra for expected Mean Matrices meanG <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, values=svMe, labels=labMe, name="meanG" ) expMean <- mxAlgebra( expression= meanG + cbind(t(b1%*%Age),t(b1%*%Age))+b2*Sex, name="expMean" ) # Create Matrices for Path Coefficients pathA <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("a",nv), lbound=lbPaD, name="a" ) pathD <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("d",nv), lbound=lbPaD, name="d" ) pathE <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPeD, label=labLower("e",nv), lbound=lbPaD, name="e" ) # Create Algebra for Variance Components covA <- mxAlgebra( expression=a %*% t(a), name="A" ) covD <- mxAlgebra( expression=d %*% t(d), name="D" ) covE <- mxAlgebra( expression=e %*% t(e), name="E" ) # Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins covP <- mxAlgebra( expression= A+D+E, name="V" ) covMZ <- mxAlgebra( expression= A+D, name="cMZ" ) covDZ <- mxAlgebra( expression= 0.5%x%A+ 0.25%x%D, name="cDZ" ) expCovMZ <- mxAlgebra( expression= rbind( cbind(V, cMZ), cbind(t(cMZ), V)), name="expCovMZ" ) expCovDZ <- mxAlgebra( expression= rbind( cbind(V, cDZ), cbind(t(cDZ), V)), name="expCovDZ" ) # Create Algebra for Standardization matI <- mxMatrix( type="Iden", nrow=nv, ncol=nv, name="I") invSD <- mxAlgebra( expression=solve(sqrt(I*V)), name="iSD") # Calculate genetic and environmental correlations corA <- mxAlgebra( expression=solve(sqrt(I*A))%&%A, name ="rA" ) #cov2cor() corD <- mxAlgebra( expression=solve(sqrt(I*D))%&%D, name ="rD" ) corE <- mxAlgebra( expression=solve(sqrt(I*E))%&%E, name ="rE" ) # Create Data Objects for Multiple Groups dataMZ <- mxData( observed=mzData, type="raw" ) dataDZ <- mxData( observed=dzData, type="raw" ) # Create Expectation Objects for Multiple Groups expMZ <- mxExpectationNormal( covariance="expCovMZ", means="expMean", dimnames=selVars ) expDZ <- mxExpectationNormal( covariance="expCovDZ", means="expMean", dimnames=selVars ) funML <- mxFitFunctionML() # Create Model Objects for Multiple Groups pars <- list(pathB1,pathB2, meanG, matI, invSD, pathA, pathD, pathE, covA, covD, covE, covP, corA, corD, corE) defs <- list(defAge, defSex) modelMZ <- mxModel( name="MZ", pars, defs, expMean, covMZ, expCovMZ, dataMZ, expMZ, funML ) modelDZ <- mxModel( name="DZ", pars, defs, expMean, covDZ, expCovDZ, dataDZ, expDZ, funML ) multi <- mxFitFunctionMultigroup( c("MZ","DZ") ) # Create Algebra for Variance Components rowVC <- rep('VC',nv) colVC <- rep(c('A','D','E','SA','SD','SE'),each=nv) estVC <- mxAlgebra( expression=cbind(A,D,E,A/V,D/V,E/V), name="VC", dimnames=list(rowVC,colVC)) # Create Confidence Interval Objects ciADE <- mxCI(c( "MZ.rA", "MZ.rD", "MZ.rE","MZ.A","MZ.D","MZ.E","VC")) # Build Model with Confidence Intervals modelADE <- mxModel( "twoADEca", pars, modelMZ, modelDZ, multi, estVC, ciADE ) # ------------------------------------------------------------------------------ # RUN MODEL # Run ADE Model fitADE <- mxTryHard( modelADE, intervals=F ) sumADE <- summary( fitADE ) # Compare with Saturated Model mxCompare( fit, fitADE ) # Print Goodness-of-fit Statistics & Parameter Estimates fitGofs(fitADE) fitEsts(fitADE) # ------------------------------------------------------------------------------ # RUN SUBMODELS # Test Significance of Covariates modelCov <- mxModel( fitADE, name="testCov" ) modelCov <- omxSetParameters( modelCov, label=c("b11","b12","b13","b21", "b22","b23"), free=FALSE, values=0 ) fitCov <- mxRun( modelCov ) mxCompare( fitADE, fitCov ) # Run AE model modelAE <- mxModel( fitADE, name="twoAEca" ) modelAE <- omxSetParameters( modelAE, labels=labLower("d",nv), free=FALSE, values=0 ) fitAE <- mxRun( modelAE, intervals=F ) mxCompare( fitADE, fitAE ) fitGofs(fitAE) fitEsts(fitAE) # Run E model modelE <- mxModel( fitAE, name="twoEca" ) modelE <- omxSetParameters( modelE, labels=labLower("a",nv), free=FALSE, values=0 ) fitE <- mxRun( modelE, intervals=T ) mxCompare( fitAE, fitE ) fitGofs(fitE) fitEsts(fitE) # Print Comparative Fit Statistics mxCompare( fitADE, nested <- list(fitCov, fitAE, fitE) ) #round(rbind(fitADE$VC$result,fitAE$VC$result,fitE$VC$result),4) fitADE$algebras mxEval(cov2cor(V), fitADE, T) fitAE$algebras mxEval(cov2cor(V), fitAE, T) #----------------------------------------------- #COMMON AND INDEPENDENT MODELS # Fit Independent Pathway ADE Model # ------------------------------------------------------------------------------ nf <- 1 # number of factors # Matrices ac, cc, and ec to store a, d, and e path coefficients for common factors pathAc <- mxMatrix( type="Full", nrow=nv, ncol=nf, free=TRUE, values=.6, labels=labFull("ac",nv,nf), name="ac" ) pathDc <- mxMatrix( type="Full", nrow=nv, ncol=nf, free=TRUE, values=.6, labels=labFull("dc",nv,nf), name="dc" ) pathEc <- mxMatrix( type="Full", nrow=nv, ncol=nf, free=TRUE, values=.6, labels=labFull("ec",nv,nf), name="ec" ) # Matrices as, ds, and es to store a, d, and e path coefficients for specific factors pathAs <- mxMatrix( type="Diag", nrow=nv, ncol=nv, free=TRUE, values=4, labels=labDiag("as",nv), lbound=.00001, name="as" ) pathDs <- mxMatrix( type="Diag", nrow=nv, ncol=nv, free=TRUE, values=4, labels=labDiag("ds",nv), lbound=.00001, name="ds" ) pathEs <- mxMatrix( type="Diag", nrow=nv, ncol=nv, free=TRUE, values=5, labels=labDiag("es",nv), lbound=.00001, name="es" ) # Matrices A, D, and E compute variance components covA <- mxAlgebra( expression=ac %*% t(ac) + as %*% t(as), name="A" ) covD <- mxAlgebra( expression=dc %*% t(dc) + ds %*% t(ds), name="D" ) covE <- mxAlgebra( expression=ec %*% t(ec) + es %*% t(es), name="E" ) # Create Model Objects for Multiple Groups pars <- list(pathB1, pathB2,meanG, matI, invSD, pathAc, pathDc, pathEc, pathAs, pathDs, pathEs, covA, covD, covE, covP, corA, corD, corE) modelMZ <- mxModel( name="MZ", pars, defs, expMean, covMZ, expCovMZ, dataMZ, expMZ, funML ) modelDZ <- mxModel( name="DZ", pars, defs, expMean, covDZ, expCovDZ, dataDZ, expDZ, funML ) multi <- mxFitFunctionMultigroup( c("MZ","DZ") ) # Build & Run Model modelIP <- mxModel( "mulIPc", pars, modelMZ, modelDZ, multi ) fitIP <- mxRun( modelIP, intervals=F ) sumIP <- summary( fitIP ) mxCompare( fitADE, fitIP ) fitGofs(fitIP) fitEsts(fitIP) parameterSpecifications(fitIP) # Fit Common Pathway ACE Model # ------------------------------------------------------------------------------ nl <- 1 # Matrices ac, cc, and ec to store a, c, and e path coefficients for latent phenotype(s) pathAl <- mxMatrix( type="Lower", nrow=nl, ncol=nl, free=TRUE, values=.6, labels=labLower("al",nl), lbound=.00001, name="al" ) pathDl <- mxMatrix( type="Lower", nrow=nl, ncol=nl, free=TRUE, values=.6, labels=labLower("dl",nl), lbound=.00001, name="dl" ) pathEl <- mxMatrix( type="Lower", nrow=nl, ncol=nl, free=TRUE, values=.6, labels=labLower("el",nl), lbound=.00001, name="el" ) # Matrix and Algebra for constraint on variance of latent phenotype covarLP <- mxAlgebra( expression=al %*% t(al) + dl %*% t(dl) + el %*% t(el), name="CovarLP" ) varLP <- mxAlgebra( expression= diag2vec(CovarLP), name="VarLP" ) unit <- mxMatrix( type="Unit", nrow=nl, ncol=1, name="Unit") varLP1 <- mxConstraint( expression=VarLP == Unit, name="varLP1") # Matrix f for factor loadings on latent phenotype pathFl <- mxMatrix( type="Full", nrow=nv, ncol=nl, free=TRUE, values=.2, labels=labFull("fl",nv,nl), name="fl" ) # Matrices A, C, and E compute variance components covA <- mxAlgebra( expression=fl %&% (al %*% t(al)) + as %*% t(as), name="A" ) covD <- mxAlgebra( expression=fl %&% (dl %*% t(dl)) + ds %*% t(ds), name="D" ) covE <- mxAlgebra( expression=fl %&% (el %*% t(el)) + es %*% t(es), name="E" ) # Create Model Objects for Multiple Groups pars <- list(pathB1, pathB2,meanG, matI, invSD, pathAl, pathDl, pathEl, covarLP, varLP, unit, pathFl, pathAs, pathDs, pathEs, covA, covD, covE, covP) modelMZ <- mxModel( name="MZ", pars,defs, expMean, covMZ, expCovMZ, dataMZ, expMZ, funML ) modelDZ <- mxModel( name="DZ", pars,defs, expMean, covDZ, expCovDZ, dataDZ, expDZ, funML ) multi <- mxFitFunctionMultigroup( c("MZ","DZ") ) # Build & Run Model modelCP <- mxModel( "mulCPc", pars, varLP1, modelMZ, modelDZ, multi ) fitCP <- mxRun(modelCP, intervals=F ) sumCP <- summary( fitCP ) mxCompare( fitADE, fitCP ) parameterSpecifications(fitCP) # ------------------------------------------------------------------------------ sink() save.image(paste(filename,".Ri",sep=""))
The "saturated" ADE model is supposed to have more free parameters than the other two.
I think it might be good for you to try setting up these models with umx ( a high-level wrapper for OpenMx), and only worry about writing a full script if umx doesn't do what you want. Take a look at the attached demo script for umx, from this year's Boulder Workshop.
Thank you so much Robert,
UMX Is awesome. I have tried to do the Independent Pathway model with UMX and I got 9 parameter and 9 parameters (for specific) 18 in total
With my model I have 27 parameters (18 parameters+6covariates +3 means) Here my results:
1 b11 b1 1 1 7.4248605e-02 0.014435780
2 b12 b1 2 1 9.2977914e-03 0.011591250
3 b13 b1 3 1 3.9710355e-02 0.013932112
4 b21 b2 1 1 -2.0584971e-01 0.018878831
5 b22 b2 1 2 -4.1692795e-01 0.014849651
6 b23 b2 1 3 -3.4235173e-01 0.017817483
7 mean_IN_Ln meanG 1 1 1.4105093e-01 0.234235435
8 mean_CA_Ln meanG 1 2 1.9883260e+00 0.189581844
9 mean_MF_Ln meanG 1 3 6.5445748e-01 0.227853485
10 ac_1_1 ac 1 1 -4.2015215e-01 0.035709520
11 ac_2_1 ac 2 1 -3.4143557e-01 0.032853588
12 ac_3_1 ac 3 1 -5.2114878e-01 0.042317199
13 dc_1_1 dc 1 1 -3.3131637e-01 0.049093597
14 dc_2_1 dc 2 1 8.9218977e-02 0.068698639
15 dc_3_1 dc 3 1 2.3962438e-02 0.066020980
16 ec_1_1 ec 1 1 -3.0663489e-01 0.020859518
17 ec_2_1 ec 2 1 -2.8551307e-01 0.017839644
18 ec_3_1 ec 3 1 -3.8998228e-01 0.022629224
19 as_1_1 as 1 1 8.4181968e-07 0.116274385 ! 0!
20 as_2_2 as 2 2 2.9288435e-01 0.051032769 1e-05
21 as_3_3 as 3 3 2.3533050e-01 0.093647771 1e-05
22 ds_1_1 ds 1 1 1.4744784e-05 0.594878427 ! 0!
23 ds_2_2 ds 2 2 1.4553241e-05 0.244555807 ! 0!
24 ds_3_3 ds 3 3 1.0000000e-05 0.104726102 ! 0!
25 es_1_1 es 1 1 5.4550865e-01 0.013016541 1e-05
26 es_2_2 es 2 2 4.7051944e-01 0.011419290 1e-05
27 es_3_3 es 3 3 5.0984167e-01 0.016342803 1e-05
So, I have the same number of parameters for the ADE and IP models am I doing something wrong?
Best.
Now that I think about it...with only 3 phenotypes, you would indeed have the same number of parameters for the IP and vanilla ADE models. They would be alternate parameterizations of one another. Since you're interested in genetic variance shared among traits, I suppose the IP parameterization is easier to interpret in that way.
Give the CP model a try. You should have 24 parameters in the CP model (9 for the model-expected means, 3 for the common-factor variance, 3 common-factor loadings, and 9 for the unique variances).
Note that, for the CP model, umx identifies the scale of the common factor by fixing its variance to zero via an MxConstraint. There are other ways to identify the model that don't require MxConstraints. I mention this because MxConstraints can make optimization more difficult.
Thank you!
You are right the CP has 24 parameters. Attached you can see the result.
I do not know if the results are reasonable. I have negative values for example dl_1_1, fl_2_1 or ac_1_1. I think that is correct but only to be sure. Also, I need to know how to get the standardized value it would be the square of each value?
I am also concern about the total value for A I mean in the ADE model a2 for the first variable is 0.2402 if I want to know the total a2 for this variable in the independent pathway model It would be (ac_1_1)2 + (as_1_1)2 ? but the result of that is 0.18 is this discrepancy normal? Or I am doing something wrong.
Thank you so much and sorry for the inconveniences.
That's OK. Those parameters are common-factor loadings of one kind or another. They're only identified within a change of sign. For instance, in the IP model, you could flip the sign on
ac_1_1
,ac_2_1
, andac_3_1
all at once and not change the model fit.Standardized value of what, specifically?
(ac_1_1)^2 + (as_1_1)^2
gives you the raw additive-genetic variance in the first trait. The value 0.2402 you reference is the additive-genetic variance proportion (a², the standardized additive-genetic variance component). However, I notice that your vanilla ADE model gives an additive-genetic variance of 0.1628 for the first trait, but the IP model gives a value of(-0.4202)^2 + 0^2
, or 0.1766, which is slightly different. In your case, the vanilla ADE and the IP model would be equivalent models. Is the -2logL at the solution the same for both of them? Also, are therelbounds
at zero on some of the free parameters in the IP model?Thank you so much Robert.
In this slide (attached) from one of the courses they calculate the tot-h2 as follow:
(as_1_1)2+(ac_1_1)2 = (0.29)2+(0.64)2=0.08+0.41=0.49
So, I thought it would be the same for my model.
Am I doing something wrong? Are these values (from the slide) previously standardized?
Is this what you want for -2logL?:
> mxCompare( fitADE, fitIP )
base comparison ep minus2LL df AIC diffLL diffdf p
1 twoADEca 27 61202.330 28108 4986.3298 NA NA NA
2 twoADEca mulIPc 27 61204.159 28108 4988.1592 1.8294638 0 NA
How can I check if there are lbounds at zero on some of the free paramenters in the IP model?
Thank you,
Best wishes.
Judging from the title of the slide, I'd say yes, they've been previously standardized. That is, the estimates have been re-scaled to be what they would equal if all variables in the model had unit variance.
So there's a difference in -2logL of almost 2, but they should have equivalent solutions.
The
lbound
s should appear in the parameters table insummary()
output, if there are any.Judging from the title of the slide, I'd say yes, they've been previously standardized. That is, the estimates have
been re-scaled to be what they would equal if all variables in the model had unit variance.
Ok I see. is It common to present the results? I mean for example in the attached figure from one paper (a11)2+(e11)2= 1
But in my output that is not 1. Should I re-scaled before presenting the results?
I would like to know what is the most correct way to present the results that or the raw data?
Here the results from the IP model It seems that there are lbounds
Thank you so much
The standardized path coefficients are probably the preferred way to present the results. Try providing argument
addStd=TRUE
toumxIP()
.I see that there are indeed lbounds that keep the specific path coefficients non-negative. I'm not sure why the lbound is zero for one of the specific "a" paths, and 1e-5 for the other two. The sign on these path coefficients doesn't matter to model fit, so try changing all the lbounds to zero. You can use argument
lbound
toomxSetParameters()
to change parameters' lbounds in your model--something likeThen see if you get the same -2logL as the vanilla ADE model.
Hi Rob,
I have added these lines to the script to change the lbounds:
modelIP2 <- mxModel( fitIP, name="mulIPc2" )
modelIP2 <- omxSetParameters( modelIP2, labels=c("as_1_1","as_2_2","as_3_3","ds_1_1","ds_2_2","ds_3_3","es_1_1","es_2_2","es_3_3"),lbound=0)
fitIP2 <- mxRun( modelIP2)
mxCompare( fitIP, fitIP2 )
fitGofs(fitIP2)
fitEsts(fitIP2)
But I get the same values
What should I do?
Thank you!
Did you try re-running them with
mxTryHard()
? If not, give that a try.Actually, never mind. We talked about this at the OpenMx developers' meeting this morning, and it turns out I was mistaken. In your case, the IP model is actually a more restrictive model than the vanilla ADE model, even though they have the same number of free parameters. So, you should be getting a slightly worse fit from the IP model.
Thank you so much Rob!
It is really helpful for me talking with you and I am learning a lot