You are here

Trivariate Cholesky decomposition model

9 posts / 0 new
Last post
nsc_95's picture
Offline
Joined: 07/28/2023 - 11:29
Trivariate Cholesky decomposition model

Hi!

I have a script from SGDP summer school, I can analyze using 2 continuous variables with this script. However, when I try using 3 variables (2 continuous 1 dummy coded), I get stuck while setting up the matrix. Before starting the analysis, I standardized my variables so that the SDs were 1 and their mean was 0.

Part of my code is as follows:

nv   <- 3      # number of variables for a twin 
ntv <- 2*nv     # number of variables for a pair 
 
Vars    <- c('bmi', 'age','dummy_x')
selVars <- c('bmi1','age1', 'dummy_x1', 'bmi2','age2', 'dummy_x2')
 
# Select Data for Analysis
mzData <- subset(TwinData, zygosity=='MZ', selVars)
dzData <- subset(TwinData, zygosity=='DZ', selVars)
 
# Descriptive Statistics of data by zygosity group
describe(mzData)
describe(dzData)
colMeans(mzData,na.rm=TRUE)
cov(mzData,use="complete")
cor(mzData,use="complete")
colMeans(dzData,na.rm=TRUE)
cov(dzData,use="complete")
cor(dzData,use="complete")
 
# -----------------------------------------------------------------------------------------------------------------------------------
# Now we estimate the MZ and DZ covariances and Means by fitting a 'model' to the data
# 1a) Specify Bivariate Saturated Model (Cholesky Decomposition)
# -----------------------------------------------------------------------------------------------------------------------------------
 
# Matrix & Algebra for expected means and covariances 
MZlow       <-mxMatrix( type="Lower", nrow=ntv, ncol=ntv, free=T, values=5, name="LowMZ" )
CovMZ       <-mxAlgebra( expression=LowMZ %*% t(LowMZ), name="ExpCovMZ" )
MeanMZ  <-mxMatrix( type="Full", nrow=1, ncol=ntv, free=T, values=c(0,0,0,0,0,0), name="ExpMeanMZ" )
 
DZlow       <-mxMatrix( type="Lower", nrow=ntv, ncol=ntv, free=T, values=5, name="LowDZ" )
CovDZ       <-mxAlgebra( expression=LowDZ %*% t(LowDZ), name="ExpCovDZ" )
MeanDZ  <-mxMatrix( type="Full", nrow=1, ncol=ntv, free=T, values=c(0,0,0,0,0,0), name="ExpMeanDZ" )
 
# Data objects for Multiple Groups
dataMZ   <- mxData( observed=mzData, type="raw" )
dataDZ   <- mxData( observed=dzData, type="raw" )
 
# Objective objects for Multiple Groups
objMZ    <- mxExpectationNormal( covariance="ExpCovMZ", means="ExpMeanMZ", dimnames=selVars)
objDZ    <- mxExpectationNormal( covariance="ExpCovDZ", means="ExpMeanDZ", dimnames=selVars)
 
fitFunction <- mxFitFunctionML()
 
# Combine Groups
modelMZ <- mxModel( MZlow, CovMZ, MeanMZ, dataMZ, objMZ, fitFunction, name="MZ")
modelDZ <- mxModel( DZlow, CovDZ, MeanDZ, dataDZ, objDZ, fitFunction, name="DZ")
minus2ll    <- mxAlgebra( expression=MZ.objective + DZ.objective, name="m2LL" )
obj     <- mxFitFunctionAlgebra( "m2LL" )
SatModel    <- mxModel( "Sat", modelMZ, modelDZ, minus2ll, obj)
 
# -------------------------------------------------------------------------------------------------------------------------------
# 1a) RUN Saturated Model (Cholesky Decomposition)
 
SatFit      <- mxRun(SatModel)
(SatSum       <- summary(SatFit))
# Generate SatModel Output
round(SatFit@output$estimate,4)
 
mxEval(MZ.ExpMeanMZ, SatFit)
mxEval(MZ.ExpCovMZ, SatFit)
mxEval(DZ.ExpMeanDZ, SatFit)
mxEval(DZ.ExpCovDZ, SatFit)
 
# -----------------------------------------------------------------------------------------------------------------------------------
# 1b) Specify Bivariate Saturated Model (Gaussian Decomposition) 
#     We will use this specification to fit a constrained model
# -----------------------------------------------------------------------------------------------------------------------------------
svM <-c(0,0,0,0,0,0)             # these are start values
svSD    <-c(1,1,1,1,1,1)
svMZ<-c(-.04,.2,-.4,-.1,.2,-.4)
svDZ    <-c(-.03,-.1,-.1,-.1,.5,-.3)
 
# Matrix & Algebra for expected means and covariances 
MeanMZ  <-mxMatrix( "Full", 1, ntv, free=T, values=svM, labels=c("MZm11", "MZm21","MZm31","MZm12", "MZm22",'Mzm32'), name="ExpMeanMZ" )
MZsd        <-mxMatrix( "Diag", ntv, ntv, free=T, values=svSD,labels=c("MZs11", "MZs21","MZs31","MZs12", "MZs22",'Mzs32'), name="sdMZ" )
Cormz       <-mxMatrix( "Stand", ntv, ntv, free=T, values=svMZ, labels=c("rPhMZ1","rMZ1","MZxtxt1","MZxtxt2","rMZ2","rPhMZ2"),name="MZCor") 
CovMZ       <-mxAlgebra( expression=sdMZ %*% MZCor %*% t(sdMZ), name="ExpCovMZ" )
 
MeanDZ  <-mxMatrix( "Full", 1, ntv, free=T, values=svM, labels=c("DZm11", "DZm21", "DZm12", "DZm22"), name="ExpMeanDZ" )
DZsd        <-mxMatrix( "Diag", ntv, ntv, free=T, values=svSD, labels=c("DZs11", "DZs21", "DZs12", "DZs22"), name="sdDZ" )
Cordz       <-mxMatrix( "Stand", ntv, ntv, free=T, values=svDZ, labels=c("rPhDZ1","rDZ1","DZxtxt1","DZxtxt2","rDZ2","rPhDZ2"),name="DZCor") 
CovDZ       <-mxAlgebra( expression=sdDZ %*% DZCor %*% t(sdDZ), name="ExpCovDZ" )

First thing I don't understand is how do we determine the svMZ and svDZ values? How many values ​​do we need to enter? Originally there were 6 for 2 variables, in this case do I have to enter 9 values ​​for 3 variables?
Secondly, how do we write the elements of the matrix in the corMZ and corDZ part?

Thanks!

AdminRobK's picture
Offline
Joined: 01/24/2014 - 12:15
First of all, I notice that

First of all, I notice that you're treating your dummy-coded variable as though it's continuous, and not as a "threshold trait". Are you sure that's what you want to do?

Before starting the analysis, I standardized my variables so that the SDs were 1 and their mean was 0.

That's not necessary, and I'm not sure it's even a good idea.

First thing I don't understand is how do we determine the svMZ and svDZ values?

They're start values, so they should just be reasonable guesses at what the corresponding statistics are in your datasets. For instance, you could use the output of cor(mzData,use="complete") and cor(dzData,use="complete") to guide your choice of start values. Remember that, by default, R puts values into matrices in column-major order.

How many values ​​do we need to enter? Originally there were 6 for 2 variables, in this case do I have to enter 9 values ​​for 3 variables?

No. The number of unique elements in a kxk correlation matrix is k(k-1)/2. So, for a 6x6 correlation matrix (for 3 traits and 2 twins), you would need 6(5)/2 = 15 values.

Secondly, how do we write the elements of the matrix in the corMZ and corDZ part?

Sorry, I don't understand this question...?

Finally, consider replacing these two lines,

minus2ll    <- mxAlgebra( expression=MZ.objective + DZ.objective, name="m2LL" )
obj     <- mxFitFunctionAlgebra( "m2LL" )

, with some syntax that uses mxFitFunctionMultigroup.

nsc_95's picture
Offline
Joined: 07/28/2023 - 11:29
Hi Rob!

Hi Rob!
Thank you for your answers. Well, how can I add it (dummy-coded variable) with the threshold trait. Frankly, I couldn't find how to include a dummy coded variable in the analysis.

Secondly, how do we write the elements of the matrix in the corMZ and corDZ part?

What I meant by this question was how I should write the corDZ and corMZ matrices using the svMZ and svDZ values. I wrote 11 21 31 12 22 32 (for 3 traits and 2 twins) in the MeanMZ line. But I am confused how to write this in CorMZ line. Should it be written as "rPhMZ1","rMZ1","MZxtxt1","MZxtxt2","rMZ2","rPhMZ2","rPhMZ3","rMZ3","MZxtxt3".

MeanMZ  <-mxMatrix( "Full", 1, ntv, free=T, values=svM, labels=c("MZm11", "MZm21","MZm31","MZm12", "MZm22",'Mzm32'), name="ExpMeanMZ" )
MZsd        <-mxMatrix( "Diag", ntv, ntv, free=T, values=svSD,labels=c("MZs11", "MZs21","MZs31","MZs12", "MZs22",'Mzs32'), name="sdMZ" )
Cormz       <-mxMatrix( "Stand", ntv, ntv, free=T, values=svMZ, labels=c("rPhMZ1","rMZ1","MZxtxt1","MZxtxt2","rMZ2","rPhMZ2"),name="MZCor") 
CovMZ       <-mxAlgebra( expression=sdMZ %*% MZCor %*% t(sdMZ), name="ExpCovMZ" )

Thank you for your time and interest!

AdminRobK's picture
Offline
Joined: 01/24/2014 - 12:15
some advice
What I meant by this question was how I should write the corDZ and corMZ matrices using the svMZ and svDZ values. I wrote 11 21 31 12 22 32 (for 3 traits and 2 twins) in the MeanMZ line. But I am confused how to write this in CorMZ line. Should it be written as "rPhMZ1","rMZ1","MZxtxt1","MZxtxt2","rMZ2","rPhMZ2","rPhMZ3","rMZ3","MZxtxt3".

Again, the two correlation matrices are 6x6, so they have (at most) 15 unique elements. However, due to the twin-related, intraclass structure of your data, there are only nine unique elements in each, since some elements are constrained equal to each other. You need to define the MZ correlation matrix as something like this,

Cormz <-mxMatrix(
    "Stand", ntv, ntv, free=T, values=0.1, dimnames=list(selVars,selVars),
    labels=c("rPhMZ12","rPhMZ13","rMZ1","MZxtxt12","MZxtxt13",
                     "rPhMZ23","MZxtxt12","rMZ2","MZxtxt23",
                     "MZxtxt13","MZxtxt23","rMZ3",
                     "rPhMZ12","rPhMZ13",
                     "rPhMZ23"),
name="MZCor")

, and correspondingly for the DZ correlation matrix. I didn't write the script you're modifying, so I don't know for sure, but I can guess at what the labels are supposed to mean. "rMZ" probably means "MZ correlation for a given phenotype", "rPhMZ" probably means "correlation between two phenotypes as estimated in MZ twins", and "xtxt" almost certainly means "cross-twin, cross-trait".

Well, how can I add it (dummy-coded variable) with the threshold trait. Frankly, I couldn't find how to include a dummy coded variable in the analysis.

This script provides an example of a joint analysis of one continuous and one threshold trait.

nsc_95's picture
Offline
Joined: 07/28/2023 - 11:29
Thanks! I tried the script

Thanks! I tried the script you mentioned in the comment, and was able to get all the parameters. However, when looking at the confidence interval, some values ​​look NA. How can I eliminate this?
lbound estimate ubound
twoACEvj.US[1,10] -1.0535 -0.1931 0.6014
twoACEvj.US[1,13] -0.2559 0.3224 0.8897
twoACEvj.US[1,16] 0.5613 0.8707 1.2305
twoACEvj.US[2,10] NA -3.2858 NA
twoACEvj.US[2,13] NA 0.4030 NA
twoACEvj.US[2,16] NA 3.8829 NA
twoACEvj.US[2,11] -0.0382 0.3953 0.8303
twoACEvj.US[2,14] -0.0805 0.3184 0.6339
twoACEvj.US[2,17] 0.1786 0.2863 0.4704

Also, I want to report it like as in the image [Ace.h2 [1,1]....], so I tried the script I posted as the first comment. How can I do this with the script you shared?

Thanks!

AdminRobK's picture
Offline
Joined: 01/24/2014 - 12:15
verbose=TRUE
Thanks! I tried the script you mentioned in the comment, and was able to get all the parameters. However, when looking at the confidence interval, some values ​​look NA. How can I eliminate this?

OpenMx is reporting some of those confidence limits as NA because it thinks they might be wrong. You can see the masked confidence limits, and why OpenMx thinks they might be wrong, if you pass argument verbose=TRUE to summary(), e.g. summary(myFittedModel,verbose=TRUE). For a 95% confidence interval, the fit at a correct confidence limit should be 3.841 higher than the fit at the maximum-likelihood solution; with that in mind, you can judge how wrong some of the masked confidence limits are.

Also, I want to report it like as in the image [Ace.h2 [1,1]....], so I tried the script I posted as the first comment. How can I do this with the script you shared?

Sorry, could you rephrase the question?

nsc_95's picture
Offline
Joined: 07/28/2023 - 11:29
Also, I want to report it

Also, I want to report it like as in the image [Ace.h2 [1,1]....], so I tried the script I posted as the first comment. How can I do this with the script you shared?

I don't understand why the confidence intervals are not reported sequentially across the 3 variables. I think it stems from this part:

# Create Confidence Interval Objects
odd       <- seq(1+3*nv,2*3*nv,nv)
even      <- seq(2+3*nv,2*3*nv,nv)
ciACE     <- mxCI( c("US[1,odd]","US[2,odd]","US[2,even]") )

And it is reported as:

twoACEvj.US[1,10] 
twoACEvj.US[1,13] 
twoACEvj.US[1,16]  
twoACEvj.US[2,10]      
twoACEvj.US[2,13]      
twoACEvj.US[2,16]      
twoACEvj.US[2,11] 
twoACEvj.US[2,14] 
twoACEvj.US[2,17]  

It confused me to interpret this. Instead I want to report one by one such as twoACEvj.US[1,1], twoACEvj.US[1,2]....Also, what can I do to examine the confidence intervals of h2, c2,e2,Rph,Ra,Re.

Thank you for your answers and your time!

AdminRobK's picture
Offline
Joined: 01/24/2014 - 12:15
WYSIWID
It confused me to interpret this. Instead I want to report one by one such as twoACEvj.US[1,1], twoACEvj.US[1,2]....Also, what can I do to examine the confidence intervals of h2, c2,e2,Rph,Ra,Re.

Some advice: one of the guiding principles in OpenMx's design is WYSIWID--"What You Say Is What It Does". If you want confidence intervals for every element of the MxAlgebra named "US", then just reference it in your call to mxCI(), e.g. ciACE <- mxCI("US"). Of course, if you want confidence intervals for other matrices or algebras, you'll need to include them in the reference as well, e.g.,

ciACE <- mxCI(c("US","h2","c2","e2","Rph","Ra","re"))

. However, you first need to put MxAlgebras or MxMatrices with those additional names into your MxModel (which your current script does not do).

nsc_95's picture
Offline
Joined: 07/28/2023 - 11:29
In addition to my previous

In addition to my previous post, I have also attached my script:

library(OpenMx)
library(psych); library(polycor)
source("miFunctions.R")
 
# Select Continuous Variables
varsc     <- c('age','bmi')                   # list of continuous variables names
nvc       <- 2                        # number of continuous variables
ntvc      <- nvc*2                     # number of total continuous variables
conVars   <- paste(varsc,c(rep(1,nvc),rep(2,nvc)),sep="")
 
# Select Ordinal Variables
nth       <- 1                       # number of thresholds
varso     <- c('cannabis_dummy')                   # list of ordinal variables names
nvo       <- 1                         # number of ordinal variables
ntvo      <- nvo*2                     # number of total ordinal variables
ordVars   <- paste(varso,c(rep(1,nvo),rep(2,nvo)),sep="")
ordData   <- TwinData
 
# Select Variables for Analysis
vars      <- c('age','bmi','cannabis_dummy')              # list of variables names
nv        <- nvc+nvo                   # number of variables
ntv       <- nv*2                      # number of total variables
oc        <- c(0,0,1)                    # 0 for continuous, 1 for ordinal variables
selVars   <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="")
 
 
# Select Data for Analysis
mzData    <- subset(ordData, zygosity=="MZ", selVars)
dzData    <- subset(ordData, zygosity=="DZ", selVars)
mzDataF   <- cbind(mzData[,conVars],mxFactor( x=mzData[,ordVars], levels=c(0:nth)) )
dzDataF   <- cbind(dzData[,conVars],mxFactor( x=dzData[,ordVars], levels=c(0:nth)) )
 
 
mzcov <- cov(mzData,use="complete")
dzcov <- cov(dzData,use="complete")
 
describe(mzData)
# Set Starting Values
frMV      <- c(TRUE,TRUE,FALSE)             # free status for variables
svMe      <- c(0,0,0)                   # start value for means
svPa      <- .5                      # start value for path coefficient
svPe      <- .5                     # start value for path coefficient for e
svLTh     <- -.5                       # start value for first threshold
svITh     <- 1                         # start value for increments
svTh      <- matrix(rep(c(svLTh,(rep(svITh,nth-1)))),nrow=nth,ncol=nvo)     # start value for thresholds
lbTh      <- matrix(rep(c(-3,(rep(0.001,nth-1))),nv),nrow=nth,ncol=nvo)     # lower bounds for thresholds 
# ----------------------------------------------------------------------------------------------------------------------
# PREPARE MODEL
 
# Create Algebra for expected Mean & Threshold Matrices
meanG     <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=frMV, values=svMe, labels=labVars("mean",vars), name="meanG" )
thinG     <- mxMatrix( type="Full", nrow=nth, ncol=ntvo, free=TRUE, values=svTh, lbound=lbTh, labels=labTh("th",varso,nth), name="thinG" )
inc       <- mxMatrix( type="Lower", nrow=nth, ncol=nth, free=FALSE, values=1, name="inc" )
threG     <- mxAlgebra( expression= inc %*% thinG, name="threG" )
 
# Create Matrices for Variance Components
covA      <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPa,nv), label=labLower("VA",nv), name="VA" ) 
covC      <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPa,nv), label=labLower("VC",nv), name="VC" )
covE      <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, values=valDiag(svPa,nv), label=labLower("VE",nv), name="VE" )
 
# Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins
covP      <- mxAlgebra( expression= VA+VC+VE, name="V" )
covMZ     <- mxAlgebra( expression= VA+VC, name="cMZ" )
covDZ     <- mxAlgebra( expression= 0.5%x%VA+ VC, name="cDZ" )
expCovMZ  <- mxAlgebra( expression= rbind( cbind(V, cMZ), cbind(t(cMZ), V)), name="expCovMZ" )
expCovDZ  <- mxAlgebra( expression= rbind( cbind(V, cDZ), cbind(t(cDZ), V)), name="expCovDZ" )
 
 
# Constrain Variance of Binary Variables
matUnv    <- mxMatrix( type="Unit", nrow=nvo, ncol=1, name="Unv1" )
matOc     <- mxMatrix( type="Full", nrow=1, ncol=nv, free=FALSE, values=oc, name="Oc" )
var1      <- mxConstraint( expression=diag2vec(Oc%&%V)==Unv1, name="Var1" )
 
# Create Data Objects for Multiple Groups
dataMZ    <- mxData( observed=mzDataF, type="raw" )
dataDZ    <- mxData( observed=dzDataF, type="raw" )
 
# Create Expectation Objects for Multiple Groups
expMZ     <- mxExpectationNormal( covariance="expCovMZ", means="meanG", dimnames=selVars, thresholds="threG", threshnames=ordVars )
expDZ     <- mxExpectationNormal( covariance="expCovDZ", means="meanG", dimnames=selVars, thresholds="threG", threshnames=ordVars )
funML     <- mxFitFunctionML()
 
# Create Model Objects for Multiple Groups
pars      <- list(meanG, thinG, inc, threG, matUnv, matOc, covA, covC, covE, covP )
modelMZ   <- mxModel( pars, covMZ, expCovMZ, dataMZ, expMZ, funML, name="MZ" )
modelDZ   <- mxModel( pars, covDZ, expCovDZ, dataDZ, expDZ, funML, name="DZ" )
multi     <- mxFitFunctionMultigroup( c("MZ","DZ") )
 
# Create Algebra for Standardization
matI      <- mxMatrix( type="Iden", nrow=nv, ncol=nv, name="I" )
invSD     <- mxAlgebra( expression=solve(sqrt(I*V)), name="iSD" )
 
# Calculate genetic and environmental correlations
corA      <- mxAlgebra( expression=solve(sqrt(I*VA))%&%VA, name ="rA" ) #cov2cor()
corC      <- mxAlgebra( expression=solve(sqrt(I*VC))%&%VC, name ="rC" )
corE      <- mxAlgebra( expression=solve(sqrt(I*VE))%&%VE, name ="rE" )
 
 
# Create Algebra for Unstandardized and Standardized Variance Components
rowUS     <- rep('US',nv)
colUS     <- rep(c('VA','VC','VE','SA','SC','SE'),each=nv)
estUS     <- mxAlgebra( expression=cbind(VA,VC,VE,VA/V,VC/V,VE/V), name="US", dimnames=list(rowUS,colUS) )
 
# Create Confidence Interval Objects
odd       <- seq(1+3*nv,2*3*nv,nv)
even      <- seq(2+3*nv,2*3*nv,nv)
ciACE     <- mxCI( c("US[1,odd]","US[2,odd]","US[2,even]") )
 
# Build Model with Confidence Intervals
calc      <- list( matI, invSD, corA, corC, corE, estUS, ciACE )
modelACE  <- mxModel( "twoACEvj", pars, var1, modelMZ, modelDZ, multi, calc )
 
# ----------------------------------------------------------------------------------------------------------------------
# RUN MODEL
 
# Run ACE Model
fitACE    <- mxTryHard( modelACE, intervals=T )
summary( fitACE ,verbose=T)
 
# Compare with Saturated Model
#mxCompare( fitSAT, fitACE )
 
# Print Goodness-of-fit Statistics & Parameter Estimates
fitGofs(fitACE)
fitEstCis(fitACE)
fitACE$algebras
# ----------------------------------------------------------------------------------------------------------------------
# RUN SUBMODELS
 
# Run AE model
modelAE   <- mxModel( fitACE, name="twoAEvj" )
modelAE   <- omxSetParameters( modelAE, labels=labLower("VC",nv), free=FALSE, values=0 )
modelAE   <- omxSetParameters( modelAE, labels=labLower("VA",nv), free=TRUE, values=.6 )
fitAE     <- mxTryHard( modelAE, intervals=T )
 
fitGofs(fitAE); fitEstCis(fitAE)
 
# Run CE model
modelCE   <- mxModel( fitACE, name="twoCEvj" )
modelCE   <- omxSetParameters( modelCE, labels=labLower("VA",nv), free=FALSE, values=0 )
modelCE   <- omxSetParameters( modelCE, labels=labLower("VC",nv), free=TRUE, values=.6 )
fitCE     <- mxTryHard( modelCE, intervals=T )
fitGofs(fitCE); fitEstCis(fitCE)
 
# Run E model
modelE    <- mxModel( fitAE, name="twoEvj" )
modelE    <- omxSetParameters( modelE, labels=labLower("VA",nv), free=FALSE, values=0 )
fitE      <- mxTryHard( modelAE, intervals=T )
fitGofs(fitE); fitEstCis(fitE)
 
# Print Comparative Fit Statistics
mxCompare( fitACE, nested <- list(fitAE, fitCE, fitE) )
round(rbind(fitACE$US$result,fitAE$US$result,fitCE$US$result,fitE$US$result),4)
 
fitACE$algebras
# Correlations
cov2cor(mxGetExpected(fitACE$MZ, "covariance"))
cov2cor(mxGetExpected(fitACE$DZ, "covariance"))