# Bivariate model with binary and continuous variable

2 posts / 0 new
Offline
Joined: 01/31/2017 - 11:34
Bivariate model with binary and continuous variable

Hi,
I am new to bivariate models and I have tried to adapt the twoACEj.R script by Hermine Maes from an ordinal to a binary variable in the below script. The script runs without errors, but my first question is whether my adaptations are correct?

# Select Binary Variables
nth       <- 1                         # number of thresholds
varso     <- c('ht')                   # list of binary variables names
nvo       <- 1                         # number of binary variables
ntvo      <- nvo*2                     # number of total binary variables
ordVars   <- paste(varso,c(rep(1,nvo),rep(2,nvo)),sep="")

# Select Continuous Variables
varsc     <- c('bmi')                   # list of continuous variables names
nvc       <- 1                         # number of continuous variables
ntvc      <- nvc*2                     # number of total continuous variables
conVars   <- paste(varsc,c(rep(1,nvc),rep(2,nvc)),sep="")

# Select Variables for Analysis
vars      <- c('ht','bmi')              # list of variables names
nv        <- nvo+nvc                   # number of variables
ntv       <- nv*2                      # number of total variables
oc        <- c(1,0)                    # 0 for continuous, 1 for ordinal variables
selVars   <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="")

# Select Data for Analysis
mzData    <- subset(twinData, zyg==1, selVars)
dzData    <- subset(twinData, zyg==2, selVars)
mzDataF   <- cbind(mxFactor( x=mzData[,ordVars], levels=c(0:nth)), mzData[,conVars])
dzDataF   <- cbind(mxFactor( x=dzData[,ordVars], levels=c(0:nth)), dzData[,conVars])

# Generate Descriptive Statistics
apply(mzData,2,myMean)
apply(dzData,2,myMean)
#?apply(mzData,2,myCor)
#?apply(dzData,2,myCor)

# Set Starting Values
frMV      <- c(FALSE, TRUE)             # free status for variables
frCvD     <- diag(frMV,ntv,ntv)        # lower bounds for diagonal of covariance matrix
frCvD[lower.tri(frCvD)] <- TRUE        # lower bounds for below diagonal elements
frCvD[upper.tri(frCvD)] <- TRUE        # lower bounds for above diagonal elements
frCv      <- matrix(as.logical(frCvD),4)
svMe      <- c(0,22)                   # start value for means
svPa      <- .4                        # start value for path coefficient
svPaD     <- vech(diag(svPa,nv,nv))    # start values for diagonal of covariance matrix
svPe      <- .8                        # start value for path coefficient for e
svPeD     <- vech(diag(svPe,nv,nv))    # start values for diagonal of covariance matrix
lbPa      <- .0001                     # start value for lower bounds
lbPaD     <- diag(lbPa,nv,nv)          # lower bounds for diagonal of covariance matrix
lbPaD[lower.tri(lbPaD)] <- -10         # lower bounds for below diagonal elements
lbPaD[upper.tri(lbPaD)] <- NA          # lower bounds for above diagonal elements
svLTh     <- -1.5                      # start value for first threshold
svITh     <- 1                         # start value for increments
svTh      <- matrix(rep(c(svLTh,(rep(svITh,nth-1)))),nrow=nth,ncol=nv)     # start value for thresholds
lbTh      <- matrix(rep(c(-3,(rep(0.001,nth-1))),nv),nrow=nth,ncol=nv)     # lower bounds for thresholds

# Create Labels
labMe     <- paste("mean", c('hs','bmi'),sep="_")
labTh     <- paste(paste("t",1:nth,sep=""),rep(varso,each=nth),sep="_")

# ------------------------------------------------------------------------------
# PREPARE MODEL

# ACE Model
# Create Algebra for expected Mean Matrices
meanG     <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=frMV, values=svMe, labels=labMe, name="meanG" )
thinG     <- mxMatrix( type="Full", nrow=nth, ncol=ntvo, free=TRUE, values=svTh, lbound=lbTh, labels=labTh, name="thinG" )
inc       <- mxMatrix( type="Lower", nrow=nth, ncol=nth, free=FALSE, values=1, name="inc" )
threG     <- mxAlgebra( expression= inc %*% thinG, name="threG" )

# Create Matrices for Path Coefficients
pathA     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("a",nv), lbound=lbPaD, name="a" )
pathC     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("c",nv), lbound=lbPaD, name="c" )
pathE     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPeD, label=labLower("e",nv), lbound=lbPaD, name="e" )

# Create Algebra for Variance Comptwonts
covA      <- mxAlgebra( expression=a %*% t(a), name="A" )
covC      <- mxAlgebra( expression=c %*% t(c), name="C" )
covE      <- mxAlgebra( expression=e %*% t(e), name="E" )

# Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins
covP      <- mxAlgebra( expression= A+C+E, name="V" )
covMZ     <- mxAlgebra( expression= A+C, name="cMZ" )
covDZ     <- mxAlgebra( expression= 0.5%x%A+ C, name="cDZ" )
expCovMZ  <- mxAlgebra( expression= rbind( cbind(V, cMZ), cbind(t(cMZ), V)), name="expCovMZ" )
expCovDZ  <- mxAlgebra( expression= rbind( cbind(V, cDZ), cbind(t(cDZ), V)), name="expCovDZ" )

# Create Algebra for Standardization
matI      <- mxMatrix( type="Iden", nrow=nv, ncol=nv, name="I")
invSD     <- mxAlgebra( expression=solve(sqrt(I*V)), name="iSD")

# Calculate genetic and environmental correlations
corA      <- mxAlgebra( expression=solve(sqrt(I*A))%&%A, name ="rA" ) #cov2cor()
corC      <- mxAlgebra( expression=solve(sqrt(I*C))%&%C, name ="rC" )
corE      <- mxAlgebra( expression=solve(sqrt(I*E))%&%E, name ="rE" )

# Constrain Variance of Binary Variables
matUnv    <- mxMatrix( type="Unit", nrow=nvo, ncol=1, name="Unv1" )
matOc     <- mxMatrix( type="Full", nrow=1, ncol=nv, free=FALSE, values=oc, name="Oc" )
var1      <- mxConstraint( expression=diag2vec(Oc%&%V)==Unv1, name="Var1" )

# Create Data Objects for Multiple Groups
dataMZ    <- mxData( observed=mzDataF, type="raw" )
dataDZ    <- mxData( observed=dzDataF, type="raw" )

# Create Expectation Objects for Multiple Groups
expMZ     <- mxExpectationNormal( covariance="expCovMZ", means="meanG", dimnames=selVars, thresholds="threG", threshnames=ordVars )
expDZ     <- mxExpectationNormal( covariance="expCovDZ", means="meanG", dimnames=selVars, thresholds="threG", threshnames=ordVars )
funML     <- mxFitFunctionML()

# Create Model Objects for Multiple Groups
pars      <- list(meanG, thinG, inc, threG, matI, invSD, matUnv, matOc,
pathA, pathC, pathE, covA, covC, covE, covP, corA, corC, corE)
modelMZ   <- mxModel( name="MZ", pars, covMZ, expCovMZ, dataMZ, expMZ, funML )
modelDZ   <- mxModel( name="DZ", pars, covDZ, expCovDZ, dataDZ, expDZ, funML )
multi     <- mxFitFunctionMultigroup( c("MZ","DZ") )

# Create Algebra for Variance Components
rowVC     <- rep('VC',nv)
colVC     <- rep(c('A','C','E','SA','SC','SE'),each=nv)
estVC     <- mxAlgebra( expression=cbind(A,C,E,A/V,C/V,E/V), name="VC", dimnames=list(rowVC,colVC))

# Create Confidence Interval Objects
ciACE     <- mxCI(c("VC[1,7]","VC[1,9]", "VC[1,11]", "VC[2,8]", "VC[2,10]", "VC[2,12]" ))

# Build Model with Confidence Intervals
modelACE  <- mxModel( "twoACEj", pars, var1, modelMZ, modelDZ, multi, estVC,ciACE) 

Subsequently, based on the below output I choose the ACE model over any submodel:

 base comparison ep  minus2LL    df       AIC       diffLL diffdf             p


1 twoACEj 11 32712.800 11178 10356.800 NA NA NA
2 twoACEj twoAEj 8 32722.252 11181 10360.252 9.4523951 3 2.3843227e-02
3 twoACEj twoCEj 8 32940.398 11181 10578.398 227.5982305 3 4.5718550e-49
4 twoACEj twoEj 5 34257.189 11184 11889.189 1544.3890445 6 0.0000000e+00

However, the lower bound of the CI of SA is 0.000, is this correct and does this make sense?

> fitEsts(fitACE)
mean_bmi t1_hs a_1_1 a_2_1 a_2_2 c_1_1 c_2_1 c_2_2 e_1_1 e_2_1 e_2_2
23.1373 2.2241 0.8065 -0.0811 2.8604 0.3302 -1.1363 0.0023 0.4904 -0.0410 1.9097
A A C C E E SA SA SC SC SE SE
VC 0.6505 -0.0654 0.1091 -0.3753 0.2405 -0.0201 0.6505 0.1419 0.1091 0.8144 0.2405 0.0436
VC -0.0654 8.1886 -0.3753 1.2913 -0.0201 3.6485 0.1419 0.6237 0.8144 0.0984 0.0436 0.2779
lbound estimate ubound
twoACEj.VC[1,7] 0.0000 0.6505 0.8507
twoACEj.VC[1,9] 0.0059 0.1091 0.6982
twoACEj.VC[1,11] 0.1067 0.2405 0.4640
twoACEj.VC[2,8] 0.5352 0.6237 0.7075
twoACEj.VC[2,10] 0.0196 0.0984 0.1809
twoACEj.VC[2,12] 0.2580 0.2779 0.2992

Moreover, the below CI details seem to "report a status code "nonzero gradient/red for all CIs. I am not sure how to interpret this?

CI details:
parameter value side fit mean_bmi t1_hs a_1_1 a_2_1 a_2_2 c_1_1
1 twoACEj.VC[1,7] 2.4850239e-05 lower 32716.641 23.140124 2.2065754 0.004985004 0.108697628 2.8455448 0.811215832
2 twoACEj.VC[1,7] 8.5065598e-01 upper 32716.650 23.128152 2.2105490 0.922310133 -0.299920357 2.8246636 0.157758943
3 twoACEj.VC[1,9] 5.8810741e-03 lower 32716.644 23.120816 2.2181279 0.882108184 -0.458473930 2.8182535 0.076688161
4 twoACEj.VC[1,9] 6.9816923e-01 upper 32716.673 23.138658 2.2183166 0.018547226 -2.493000405 1.4130896 0.835565224
5 twoACEj.VC[1,11] 1.0665508e-01 lower 32716.648 23.135947 2.2007855 0.867513790 -0.120282731 2.8472254 0.375186283
6 twoACEj.VC[1,11] 4.6401224e-01 upper 32716.658 23.136136 2.2373873 0.573177370 -0.078829212 2.8644263 0.455475777
7 twoACEj.VC[2,8] 5.3520770e-01 lower 32716.646 23.138107 2.2231490 0.826606517 -0.066884929 2.6534599 0.281284034
8 twoACEj.VC[2,8] 7.0746664e-01 upper 32716.645 23.131552 2.2216208 0.739852906 -0.257968239 3.0366597 0.479652614
9 twoACEj.VC[2,10] 1.9566469e-02 lower 32716.645 23.130503 2.2220530 0.731951748 -0.280816975 3.0194490 0.488947391
10 twoACEj.VC[2,10] 1.8086435e-01 upper 32716.647 23.138985 2.2244832 0.799906860 -0.053095311 2.6695519 0.313275027
11 twoACEj.VC[2,12] 2.5798502e-01 lower 32716.643 23.138385 2.2240622 0.794925301 -0.082005655 2.9456948 0.347706223
12 twoACEj.VC[2,12] 2.9923245e-01 upper 32716.643 23.136116 2.2281715 0.782260737 -0.081690070 2.7667635 0.358493602
c_2_1 c_2_2 e_1_1 e_2_1 e_2_2 method diagnostic statusCode
1 -0.54132925 1.0148719e+00 0.58472646 -0.0458843144 1.9226946 neale-miller-1997 success nonzero gradient/red
2 -1.18976326 4.9083101e-04 0.35278341 0.0193929090 1.9121737 neale-miller-1997 success nonzero gradient/red
3 -0.85016056 7.6804313e-01 0.46476239 0.0483915125 1.9088932 neale-miller-1997 success nonzero gradient/red
4 -0.47162022 1.0220297e+00 0.54907811 -0.0425895786 1.9092756 neale-miller-1997 success nonzero gradient/red
5 -0.93157210 6.9198941e-01 0.32658089 -0.0382025425 1.9088138 neale-miller-1997 success nonzero gradient/red
6 -0.84669292 7.4380220e-01 0.68118617 -0.0357496784 1.9094127 neale-miller-1997 success nonzero gradient/red
7 -1.37263512 6.8850457e-01 0.48744340 -0.0423087655 1.9386693 neale-miller-1997 success nonzero gradient/red
8 -0.54445165 -7.7042247e-05 0.47175324 -0.0136392463 1.8825139 neale-miller-1997 success nonzero gradient/red
9 -0.50536961 -2.7393269e-05 0.47452828 -0.0095315172 1.8977468 neale-miller-1997 success nonzero gradient/red
10 -1.27158725 8.8090563e-01 0.51186716 -0.0442595747 1.9251761 neale-miller-1997 success nonzero gradient/red
11 -1.08633458 2.5297820e-01 0.49718742 -0.0415448035 1.8574297 neale-miller-1997 success nonzero gradient/red
12 -1.04397757 5.3634622e-01 0.50947280 -0.0396417596 1.9642382 neale-miller-1997 success nonzero gradient/red

I appreciate your help!

Offline
Joined: 01/24/2014 - 12:15
The script runs without errors, but my first question is whether my adaptations are correct?

Probably? It would help to know specifically what changes you made, or at least the script as it was before you modified it. At a glance, I don't see anything wrong with your script (although creating inc and post-multiplying it by thinG isn't necessary in your case). Is there a reason you're not adjusting for the main effects of age and sex?

However, the lower bound of the CI of SA is 0.000, is this correct and does this make sense?

It's not obviously wrong. But, I bet that confidence limit is too narrow, since its lower limit would probably be negative if your MxModel allowed it. Your MxModel doesn't allow it because the numerator of "SA" is the square of the [1,1] element of a. We don't really recommend the Cholesky parameterization anymore, for this reason. I bet you're putting lower bounds of zero on the diagonal of a, too. To switch to the "direct-symmetric" parameterization, replace this,

# Create Matrices for Path Coefficients
pathA     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("a",nv), lbound=lbPaD, name="a" )
pathC     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("c",nv), lbound=lbPaD, name="c" )
pathE     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPeD, label=labLower("e",nv), lbound=lbPaD, name="e" )

# Create Algebra for Variance Comptwonts
covA      <- mxAlgebra( expression=a %*% t(a), name="A" )
covC      <- mxAlgebra( expression=c %*% t(c), name="C" )
covE      <- mxAlgebra( expression=e %*% t(e), name="E" )

, with this,

covA      <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE,  name="A" )
covC      <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE,  name="C" )
covE      <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE,  name="E" ) 

(I leave it up to you to decide if you want to use the same labels and start values as before).

In any event, you can verify the lower confidence limit by making a new MxModel that fixes "SA" to zero, and see if that worsens the -2logL by about 3.84. For instance:

fitACE2 <- mxModel(fitACE, mxConstraint(VC[1,7]==0))
fitACE2 <- mxRun(fitACE2)
#now compare change in fit

Moreover, the below CI details seem to "report a status code "nonzero gradient/red for all CIs. I am not sure how to interpret this?

Yes, but they also report "success", which is why all confidence limits are reported in non-verbose summary. There are two different ways that the OpenMx backend represents the confidence-limit search problem. The way that is default for NPSOL (which I assume you're using) and CSOLNP is described in Neale & Miller (1997). Note that OpenMx's implementation assumes that the second term in the RHS of Neale & Miller's Equation (1) is zero. As a result, the solution to the confidence-limit search will not necessarily "look like a minimum" in the sense of having a near-zero gradient.

SLSQP, by default, uses the other representation of the search problem, and on theoretical grounds alone, I feel comfortable recommending SLSQP as the first-choice optimizer for confidence intervals. omxRunCI() can run an MxModel to get confidence intervals without redoing the primary optimization, and uses SLSQP by default.