Trivariate Cholesky decomposition model
I have a script from SGDP summer school, I can analyze using 2 continuous variables with this script. However, when I try using 3 variables (2 continuous 1 dummy coded), I get stuck while setting up the matrix. Before starting the analysis, I standardized my variables so that the SDs were 1 and their mean was 0.
Part of my code is as follows:
nv <- 3 # number of variables for a twin
ntv <- 2*nv # number of variables for a pair
Vars <- c('bmi', 'age','dummy_x')
selVars <- c('bmi1','age1', 'dummy_x1', 'bmi2','age2', 'dummy_x2')
# Select Data for Analysis
mzData <- subset(TwinData, zygosity=='MZ', selVars)
dzData <- subset(TwinData, zygosity=='DZ', selVars)
# Descriptive Statistics of data by zygosity group
describe(mzData)
describe(dzData)
colMeans(mzData,na.rm=TRUE)
cov(mzData,use="complete")
cor(mzData,use="complete")
colMeans(dzData,na.rm=TRUE)
cov(dzData,use="complete")
cor(dzData,use="complete")
# -----------------------------------------------------------------------------------------------------------------------------------
# Now we estimate the MZ and DZ covariances and Means by fitting a 'model' to the data
# 1a) Specify Bivariate Saturated Model (Cholesky Decomposition)
# -----------------------------------------------------------------------------------------------------------------------------------
# Matrix & Algebra for expected means and covariances
MZlow <-mxMatrix( type="Lower", nrow=ntv, ncol=ntv, free=T, values=5, name="LowMZ" )
CovMZ <-mxAlgebra( expression=LowMZ %*% t(LowMZ), name="ExpCovMZ" )
MeanMZ <-mxMatrix( type="Full", nrow=1, ncol=ntv, free=T, values=c(0,0,0,0,0,0), name="ExpMeanMZ" )
DZlow <-mxMatrix( type="Lower", nrow=ntv, ncol=ntv, free=T, values=5, name="LowDZ" )
CovDZ <-mxAlgebra( expression=LowDZ %*% t(LowDZ), name="ExpCovDZ" )
MeanDZ <-mxMatrix( type="Full", nrow=1, ncol=ntv, free=T, values=c(0,0,0,0,0,0), name="ExpMeanDZ" )
# Data objects for Multiple Groups
dataMZ <- mxData( observed=mzData, type="raw" )
dataDZ <- mxData( observed=dzData, type="raw" )
# Objective objects for Multiple Groups
objMZ <- mxExpectationNormal( covariance="ExpCovMZ", means="ExpMeanMZ", dimnames=selVars)
objDZ <- mxExpectationNormal( covariance="ExpCovDZ", means="ExpMeanDZ", dimnames=selVars)
fitFunction <- mxFitFunctionML()
# Combine Groups
modelMZ <- mxModel( MZlow, CovMZ, MeanMZ, dataMZ, objMZ, fitFunction, name="MZ")
modelDZ <- mxModel( DZlow, CovDZ, MeanDZ, dataDZ, objDZ, fitFunction, name="DZ")
minus2ll <- mxAlgebra( expression=MZ.objective + DZ.objective, name="m2LL" )
obj <- mxFitFunctionAlgebra( "m2LL" )
SatModel <- mxModel( "Sat", modelMZ, modelDZ, minus2ll, obj)
# -------------------------------------------------------------------------------------------------------------------------------
# 1a) RUN Saturated Model (Cholesky Decomposition)
SatFit <- mxRun(SatModel)
(SatSum <- summary(SatFit))
# Generate SatModel Output
round(SatFit@output$estimate,4)
mxEval(MZ.ExpMeanMZ, SatFit)
mxEval(MZ.ExpCovMZ, SatFit)
mxEval(DZ.ExpMeanDZ, SatFit)
mxEval(DZ.ExpCovDZ, SatFit)
# -----------------------------------------------------------------------------------------------------------------------------------
# 1b) Specify Bivariate Saturated Model (Gaussian Decomposition)
# We will use this specification to fit a constrained model
# -----------------------------------------------------------------------------------------------------------------------------------
svM <-c(0,0,0,0,0,0) # these are start values
svSD <-c(1,1,1,1,1,1)
svMZ<-c(-.04,.2,-.4,-.1,.2,-.4)
svDZ <-c(-.03,-.1,-.1,-.1,.5,-.3)
# Matrix & Algebra for expected means and covariances
MeanMZ <-mxMatrix( "Full", 1, ntv, free=T, values=svM, labels=c("MZm11", "MZm21","MZm31","MZm12", "MZm22",'Mzm32'), name="ExpMeanMZ" )
MZsd <-mxMatrix( "Diag", ntv, ntv, free=T, values=svSD,labels=c("MZs11", "MZs21","MZs31","MZs12", "MZs22",'Mzs32'), name="sdMZ" )
Cormz <-mxMatrix( "Stand", ntv, ntv, free=T, values=svMZ, labels=c("rPhMZ1","rMZ1","MZxtxt1","MZxtxt2","rMZ2","rPhMZ2"),name="MZCor")
CovMZ <-mxAlgebra( expression=sdMZ %*% MZCor %*% t(sdMZ), name="ExpCovMZ" )
MeanDZ <-mxMatrix( "Full", 1, ntv, free=T, values=svM, labels=c("DZm11", "DZm21", "DZm12", "DZm22"), name="ExpMeanDZ" )
DZsd <-mxMatrix( "Diag", ntv, ntv, free=T, values=svSD, labels=c("DZs11", "DZs21", "DZs12", "DZs22"), name="sdDZ" )
Cordz <-mxMatrix( "Stand", ntv, ntv, free=T, values=svDZ, labels=c("rPhDZ1","rDZ1","DZxtxt1","DZxtxt2","rDZ2","rPhDZ2"),name="DZCor")
CovDZ <-mxAlgebra( expression=sdDZ %*% DZCor %*% t(sdDZ), name="ExpCovDZ" )
First thing I don't understand is how do we determine the svMZ and svDZ values? How many values do we need to enter? Originally there were 6 for 2 variables, in this case do I have to enter 9 values for 3 variables?
Secondly, how do we write the elements of the matrix in the corMZ and corDZ part?
Thanks!
First of all, I notice that
That's not necessary, and I'm not sure it's even a good idea.
They're start values, so they should just be reasonable guesses at what the corresponding statistics are in your datasets. For instance, you could use the output of `cor(mzData,use="complete")` and `cor(dzData,use="complete")` to guide your choice of start values. Remember that, by default, R puts values into matrices in **column-major order**.
No. The number of unique elements in a *k*x*k* correlation matrix is *k*(*k*-1)/2. So, for a 6x6 correlation matrix (for 3 traits and 2 twins), you would need 6(5)/2 = 15 values.
Sorry, I don't understand this question...?
Finally, consider replacing these two lines,
minus2ll <- mxAlgebra( expression=MZ.objective + DZ.objective, name="m2LL" )
obj <- mxFitFunctionAlgebra( "m2LL" )
, with some syntax that uses
mxFitFunctionMultigroup
.Log in or register to post comments
In reply to First of all, I notice that by AdminRobK
Hi Rob!
Thank you for your answers. Well, how can I add it (dummy-coded variable) with the threshold trait. Frankly, I couldn't find how to include a dummy coded variable in the analysis.
Secondly, how do we write the elements of the matrix in the corMZ and corDZ part?
What I meant by this question was how I should write the corDZ and corMZ matrices using the svMZ and svDZ values. I wrote 11 21 31 12 22 32 (for 3 traits and 2 twins) in the MeanMZ line. But I am confused how to write this in CorMZ line. Should it be written as "rPhMZ1","rMZ1","MZxtxt1","MZxtxt2","rMZ2","rPhMZ2","rPhMZ3","rMZ3","MZxtxt3".
MeanMZ <-mxMatrix( "Full", 1, ntv, free=T, values=svM, labels=c("MZm11", "MZm21","MZm31","MZm12", "MZm22",'Mzm32'), name="ExpMeanMZ" )
MZsd <-mxMatrix( "Diag", ntv, ntv, free=T, values=svSD,labels=c("MZs11", "MZs21","MZs31","MZs12", "MZs22",'Mzs32'), name="sdMZ" )
Cormz <-mxMatrix( "Stand", ntv, ntv, free=T, values=svMZ, labels=c("rPhMZ1","rMZ1","MZxtxt1","MZxtxt2","rMZ2","rPhMZ2"),name="MZCor")
CovMZ <-mxAlgebra( expression=sdMZ %*% MZCor %*% t(sdMZ), name="ExpCovMZ" )
Thank you for your time and interest!
Log in or register to post comments
In reply to Hi Rob! by nsc_95
some advice
Again, the two correlation matrices are 6x6, so they have (at most) 15 unique elements. However, due to the twin-related, intraclass structure of your data, there are only nine unique elements in each, since some elements are constrained equal to each other. You need to define the MZ correlation matrix as something like this,
Cormz <-mxMatrix(
"Stand", ntv, ntv, free=T, values=0.1, dimnames=list(selVars,selVars),
labels=c("rPhMZ12","rPhMZ13","rMZ1","MZxtxt12","MZxtxt13",
"rPhMZ23","MZxtxt12","rMZ2","MZxtxt23",
"MZxtxt13","MZxtxt23","rMZ3",
"rPhMZ12","rPhMZ13",
"rPhMZ23"),
name="MZCor")
, and correspondingly for the DZ correlation matrix. I didn't write the script you're modifying, so I don't know for sure, but I can guess at what the labels are supposed to mean. "rMZ" probably means "MZ correlation for a given phenotype", "rPhMZ" probably means "correlation between two phenotypes as estimated in MZ twins", and "xtxt" almost certainly means "cross-twin, cross-trait".
This script provides an example of a joint analysis of one continuous and one threshold trait.
Log in or register to post comments
Thanks! I tried the script
lbound estimate ubound
twoACEvj.US[1,10] -1.0535 -0.1931 0.6014
twoACEvj.US[1,13] -0.2559 0.3224 0.8897
twoACEvj.US[1,16] 0.5613 0.8707 1.2305
twoACEvj.US[2,10] NA -3.2858 NA
twoACEvj.US[2,13] NA 0.4030 NA
twoACEvj.US[2,16] NA 3.8829 NA
twoACEvj.US[2,11] -0.0382 0.3953 0.8303
twoACEvj.US[2,14] -0.0805 0.3184 0.6339
twoACEvj.US[2,17] 0.1786 0.2863 0.4704
Also, I want to report it like as in the image [Ace.h2 [1,1]....], so I tried the script I posted as the first comment. How can I do this with the script you shared?
Thanks!
Log in or register to post comments
In reply to Thanks! I tried the script by nsc_95
verbose=TRUE
OpenMx is reporting some of those confidence limits as `NA` because it thinks they might be wrong. You can see the masked confidence limits, and why OpenMx thinks they might be wrong, if you pass argument `verbose=TRUE` to `summary()`, e.g. `summary(myFittedModel,verbose=TRUE)`. For a 95% confidence interval, the fit at a correct confidence limit should be 3.841 higher than the fit at the maximum-likelihood solution; with that in mind, you can judge *how* wrong some of the masked confidence limits are.
Sorry, could you rephrase the question?
Log in or register to post comments
Also, I want to report it
I don't understand why the confidence intervals are not reported sequentially across the 3 variables. I think it stems from this part:
# Create Confidence Interval Objects
odd <- seq(1+3*nv,2*3*nv,nv)
even <- seq(2+3*nv,2*3*nv,nv)
ciACE <- mxCI( c("US[1,odd]","US[2,odd]","US[2,even]") )
And it is reported as:
twoACEvj.US[1,10]
twoACEvj.US[1,13]
twoACEvj.US[1,16]
twoACEvj.US[2,10]
twoACEvj.US[2,13]
twoACEvj.US[2,16]
twoACEvj.US[2,11]
twoACEvj.US[2,14]
twoACEvj.US[2,17]
It confused me to interpret this. Instead I want to report one by one such as twoACEvj.US[1,1], twoACEvj.US[1,2]....Also, what can I do to examine the confidence intervals of h2, c2,e2,Rph,Ra,Re.
Thank you for your answers and your time!
Log in or register to post comments
In reply to Also, I want to report it by nsc_95
WYSIWID
Some advice: one of the guiding principles in OpenMx's design is WYSIWID--"What You Say Is What It Does". If you want confidence intervals for every element of the MxAlgebra named "US", then just reference it in your call to
mxCI()
, e.g.ciACE <- mxCI("US")
. Of course, if you want confidence intervals for other matrices or algebras, you'll need to include them in the reference as well, e.g.,ciACE <- mxCI(c("US","h2","c2","e2","Rph","Ra","re"))
. However, you first need to put MxAlgebras or MxMatrices with those additional names into your MxModel (which your current script does not do).
Log in or register to post comments
In addition to my previous
library(OpenMx)
library(psych); library(polycor)
source("miFunctions.R")
# Select Continuous Variables
varsc <- c('age','bmi') # list of continuous variables names
nvc <- 2 # number of continuous variables
ntvc <- nvc*2 # number of total continuous variables
conVars <- paste(varsc,c(rep(1,nvc),rep(2,nvc)),sep="")
# Select Ordinal Variables
nth <- 1 # number of thresholds
varso <- c('cannabis_dummy') # list of ordinal variables names
nvo <- 1 # number of ordinal variables
ntvo <- nvo*2 # number of total ordinal variables
ordVars <- paste(varso,c(rep(1,nvo),rep(2,nvo)),sep="")
ordData <- TwinData
# Select Variables for Analysis
vars <- c('age','bmi','cannabis_dummy') # list of variables names
nv <- nvc+nvo # number of variables
ntv <- nv*2 # number of total variables
oc <- c(0,0,1) # 0 for continuous, 1 for ordinal variables
selVars <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="")
# Select Data for Analysis
mzData <- subset(ordData, zygosity=="MZ", selVars)
dzData <- subset(ordData, zygosity=="DZ", selVars)
mzDataF <- cbind(mzData[,conVars],mxFactor( x=mzData[,ordVars], levels=c(0:nth)) )
dzDataF <- cbind(dzData[,conVars],mxFactor( x=dzData[,ordVars], levels=c(0:nth)) )
mzcov <- cov(mzData,use="complete")
dzcov <- cov(dzData,use="complete")
describe(mzData)
# Set Starting Values
frMV <- c(TRUE,TRUE,FALSE) # free status for variables
svMe <- c(0,0,0) # start value for means
svPa <- .5 # start value for path coefficient
svPe <- .5 # start value for path coefficient for e
svLTh <- -.5 # start value for first threshold
svITh <- 1 # start value for increments
svTh <- matrix(rep(c(svLTh,(rep(svITh,nth-1)))),nrow=nth,ncol=nvo) # start value for thresholds
lbTh <- matrix(rep(c(-3,(rep(0.001,nth-1))),nv),nrow=nth,ncol=nvo) # lower bounds for thresholds
# ----------------------------------------------------------------------------------------------------------------------
# PREPARE MODEL
# Create Algebra for expected Mean & Threshold Matrices
meanG <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=frMV, values=svMe, labels=labVars("mean",vars), name="meanG" )
thinG <- mxMatrix( type="Full", nrow=nth, ncol=ntvo, free=TRUE, values=svTh, lbound=lbTh, labels=labTh("th",varso,nth), name="thinG" )
inc <- mxMatrix( type="Lower", nrow=nth, ncol=nth, free=FALSE, values=1, name="inc" )
threG <- mxAlgebra( expression= inc %*% thinG, name="threG" )
# Create Matrices for Variance Components
covA <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPa,nv), label=labLower("VA",nv), name="VA" )
covC <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPa,nv), label=labLower("VC",nv), name="VC" )
covE <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPa,nv), label=labLower("VE",nv), name="VE" )
# Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins
covP <- mxAlgebra( expression= VA+VC+VE, name="V" )
covMZ <- mxAlgebra( expression= VA+VC, name="cMZ" )
covDZ <- mxAlgebra( expression= 0.5%x%VA+ VC, name="cDZ" )
expCovMZ <- mxAlgebra( expression= rbind( cbind(V, cMZ), cbind(t(cMZ), V)), name="expCovMZ" )
expCovDZ <- mxAlgebra( expression= rbind( cbind(V, cDZ), cbind(t(cDZ), V)), name="expCovDZ" )
# Constrain Variance of Binary Variables
matUnv <- mxMatrix( type="Unit", nrow=nvo, ncol=1, name="Unv1" )
matOc <- mxMatrix( type="Full", nrow=1, ncol=nv, free=FALSE, values=oc, name="Oc" )
var1 <- mxConstraint( expression=diag2vec(Oc%&%V)==Unv1, name="Var1" )
# Create Data Objects for Multiple Groups
dataMZ <- mxData( observed=mzDataF, type="raw" )
dataDZ <- mxData( observed=dzDataF, type="raw" )
# Create Expectation Objects for Multiple Groups
expMZ <- mxExpectationNormal( covariance="expCovMZ", means="meanG", dimnames=selVars, thresholds="threG", threshnames=ordVars )
expDZ <- mxExpectationNormal( covariance="expCovDZ", means="meanG", dimnames=selVars, thresholds="threG", threshnames=ordVars )
funML <- mxFitFunctionML()
# Create Model Objects for Multiple Groups
pars <- list(meanG, thinG, inc, threG, matUnv, matOc, covA, covC, covE, covP )
modelMZ <- mxModel( pars, covMZ, expCovMZ, dataMZ, expMZ, funML, name="MZ" )
modelDZ <- mxModel( pars, covDZ, expCovDZ, dataDZ, expDZ, funML, name="DZ" )
multi <- mxFitFunctionMultigroup( c("MZ","DZ") )
# Create Algebra for Standardization
matI <- mxMatrix( type="Iden", nrow=nv, ncol=nv, name="I" )
invSD <- mxAlgebra( expression=solve(sqrt(I*V)), name="iSD" )
# Calculate genetic and environmental correlations
corA <- mxAlgebra( expression=solve(sqrt(I*VA))%&%VA, name ="rA" ) #cov2cor()
corC <- mxAlgebra( expression=solve(sqrt(I*VC))%&%VC, name ="rC" )
corE <- mxAlgebra( expression=solve(sqrt(I*VE))%&%VE, name ="rE" )
# Create Algebra for Unstandardized and Standardized Variance Components
rowUS <- rep('US',nv)
colUS <- rep(c('VA','VC','VE','SA','SC','SE'),each=nv)
estUS <- mxAlgebra( expression=cbind(VA,VC,VE,VA/V,VC/V,VE/V), name="US", dimnames=list(rowUS,colUS) )
# Create Confidence Interval Objects
odd <- seq(1+3*nv,2*3*nv,nv)
even <- seq(2+3*nv,2*3*nv,nv)
ciACE <- mxCI( c("US[1,odd]","US[2,odd]","US[2,even]") )
# Build Model with Confidence Intervals
calc <- list( matI, invSD, corA, corC, corE, estUS, ciACE )
modelACE <- mxModel( "twoACEvj", pars, var1, modelMZ, modelDZ, multi, calc )
# ----------------------------------------------------------------------------------------------------------------------
# RUN MODEL
# Run ACE Model
fitACE <- mxTryHard( modelACE, intervals=T )
summary( fitACE ,verbose=T)
# Compare with Saturated Model
#mxCompare( fitSAT, fitACE )
# Print Goodness-of-fit Statistics & Parameter Estimates
fitGofs(fitACE)
fitEstCis(fitACE)
fitACE$algebras
# ----------------------------------------------------------------------------------------------------------------------
# RUN SUBMODELS
# Run AE model
modelAE <- mxModel( fitACE, name="twoAEvj" )
modelAE <- omxSetParameters( modelAE, labels=labLower("VC",nv), free=FALSE, values=0 )
modelAE <- omxSetParameters( modelAE, labels=labLower("VA",nv), free=TRUE, values=.6 )
fitAE <- mxTryHard( modelAE, intervals=T )
fitGofs(fitAE); fitEstCis(fitAE)
# Run CE model
modelCE <- mxModel( fitACE, name="twoCEvj" )
modelCE <- omxSetParameters( modelCE, labels=labLower("VA",nv), free=FALSE, values=0 )
modelCE <- omxSetParameters( modelCE, labels=labLower("VC",nv), free=TRUE, values=.6 )
fitCE <- mxTryHard( modelCE, intervals=T )
fitGofs(fitCE); fitEstCis(fitCE)
# Run E model
modelE <- mxModel( fitAE, name="twoEvj" )
modelE <- omxSetParameters( modelE, labels=labLower("VA",nv), free=FALSE, values=0 )
fitE <- mxTryHard( modelAE, intervals=T )
fitGofs(fitE); fitEstCis(fitE)
# Print Comparative Fit Statistics
mxCompare( fitACE, nested <- list(fitAE, fitCE, fitE) )
round(rbind(fitACE$US$result,fitAE$US$result,fitCE$US$result,fitE$US$result),4)
fitACE$algebras
# Correlations
cov2cor(mxGetExpected(fitACE$MZ, "covariance"))
cov2cor(mxGetExpected(fitACE$DZ, "covariance"))
Log in or register to post comments