Bivariate model with binary and continuous variable

Posted on Wed, 06/20/2018 - 04:41

Kelsey Joined: 01/31/2017

Forums

Hi,
I am new to bivariate models and I have tried to adapt the twoACEj.R script by Hermine Maes from an ordinal to a binary variable in the below script. The script runs without errors, but my first question is whether my adaptations are correct?

# Select Binary Variables nth <- 1 # number of thresholds varso <- c('ht') # list of binary variables names nvo <- 1 # number of binary variables ntvo <- nvo*2 # number of total binary variables ordVars <- paste(varso,c(rep(1,nvo),rep(2,nvo)),sep="")


# Select Continuous Variables

varsc     <- c('bmi')                   # list of continuous variables names

nvc       <- 1                         # number of continuous variables

ntvc      <- nvc*2                     # number of total continuous variables

conVars   <- paste(varsc,c(rep(1,nvc),rep(2,nvc)),sep="")
# Select Variables for Analysis

vars      <- c('ht','bmi')              # list of variables names

nv        <- nvo+nvc                   # number of variables

ntv       <- nv*2                      # number of total variables

oc        <- c(1,0)                    # 0 for continuous, 1 for ordinal variables

selVars   <- paste(vars,c(rep(1,nv),rep(2,nv)),sep="")
# Select Data for Analysis

mzData    <- subset(twinData, zyg==1, selVars)

dzData    <- subset(twinData, zyg==2, selVars)

mzDataF   <- cbind(mxFactor( x=mzData[,ordVars], levels=c(0:nth)), mzData[,conVars])

dzDataF   <- cbind(mxFactor( x=dzData[,ordVars], levels=c(0:nth)), dzData[,conVars])
# Generate Descriptive Statistics

apply(mzData,2,myMean)

apply(dzData,2,myMean)

#?apply(mzData,2,myCor)

#?apply(dzData,2,myCor)
# Set Starting Values

frMV      <- c(FALSE, TRUE)             # free status for variables

frCvD     <- diag(frMV,ntv,ntv)        # lower bounds for diagonal of covariance matrix

frCvD[lower.tri(frCvD)] <- TRUE        # lower bounds for below diagonal elements

frCvD[upper.tri(frCvD)] <- TRUE        # lower bounds for above diagonal elements

frCv      <- matrix(as.logical(frCvD),4)

svMe      <- c(0,22)                   # start value for means

svPa      <- .4                        # start value for path coefficient

svPaD     <- vech(diag(svPa,nv,nv))    # start values for diagonal of covariance matrix

svPe      <- .8                        # start value for path coefficient for e

svPeD     <- vech(diag(svPe,nv,nv))    # start values for diagonal of covariance matrix

lbPa      <- .0001                     # start value for lower bounds

lbPaD     <- diag(lbPa,nv,nv)          # lower bounds for diagonal of covariance matrix

lbPaD[lower.tri(lbPaD)] <- -10         # lower bounds for below diagonal elements

lbPaD[upper.tri(lbPaD)] <- NA          # lower bounds for above diagonal elements

svLTh     <- -1.5                      # start value for first threshold

svITh     <- 1                         # start value for increments

svTh      <- matrix(rep(c(svLTh,(rep(svITh,nth-1)))),nrow=nth,ncol=nv)     # start value for thresholds

lbTh      <- matrix(rep(c(-3,(rep(0.001,nth-1))),nv),nrow=nth,ncol=nv)     # lower bounds for thresholds
# Create Labels

labMe     <- paste("mean", c('hs','bmi'),sep="_")

labTh     <- paste(paste("t",1:nth,sep=""),rep(varso,each=nth),sep="_")
# ------------------------------------------------------------------------------

# PREPARE MODEL
# ACE Model

# Create Algebra for expected Mean Matrices

meanG     <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=frMV, values=svMe, labels=labMe, name="meanG" )

thinG     <- mxMatrix( type="Full", nrow=nth, ncol=ntvo, free=TRUE, values=svTh, lbound=lbTh, labels=labTh, name="thinG" )

inc       <- mxMatrix( type="Lower", nrow=nth, ncol=nth, free=FALSE, values=1, name="inc" )

threG     <- mxAlgebra( expression= inc %*% thinG, name="threG" )
# Create Matrices for Path Coefficients

pathA     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("a",nv), lbound=lbPaD, name="a" )

pathC     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("c",nv), lbound=lbPaD, name="c" )

pathE     <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPeD, label=labLower("e",nv), lbound=lbPaD, name="e" )
# Create Algebra for Variance Comptwonts

covA      <- mxAlgebra( expression=a %*% t(a), name="A" )

covC      <- mxAlgebra( expression=c %*% t(c), name="C" )

covE      <- mxAlgebra( expression=e %*% t(e), name="E" )
# Create Algebra for expected Variance/Covariance Matrices in MZ & DZ twins

covP      <- mxAlgebra( expression= A+C+E, name="V" )

covMZ     <- mxAlgebra( expression= A+C, name="cMZ" )

covDZ     <- mxAlgebra( expression= 0.5%x%A+ C, name="cDZ" )

expCovMZ  <- mxAlgebra( expression= rbind( cbind(V, cMZ), cbind(t(cMZ), V)), name="expCovMZ" )

expCovDZ  <- mxAlgebra( expression= rbind( cbind(V, cDZ), cbind(t(cDZ), V)), name="expCovDZ" )
# Create Algebra for Standardization

matI      <- mxMatrix( type="Iden", nrow=nv, ncol=nv, name="I")

invSD     <- mxAlgebra( expression=solve(sqrt(I*V)), name="iSD")
# Calculate genetic and environmental correlations

corA      <- mxAlgebra( expression=solve(sqrt(I*A))%&%A, name ="rA" ) #cov2cor()

corC      <- mxAlgebra( expression=solve(sqrt(I*C))%&%C, name ="rC" )

corE      <- mxAlgebra( expression=solve(sqrt(I*E))%&%E, name ="rE" )
# Constrain Variance of Binary Variables

matUnv    <- mxMatrix( type="Unit", nrow=nvo, ncol=1, name="Unv1" )

matOc     <- mxMatrix( type="Full", nrow=1, ncol=nv, free=FALSE, values=oc, name="Oc" )

var1      <- mxConstraint( expression=diag2vec(Oc%&%V)==Unv1, name="Var1" )
# Create Data Objects for Multiple Groups

dataMZ    <- mxData( observed=mzDataF, type="raw" )

dataDZ    <- mxData( observed=dzDataF, type="raw" )
# Create Expectation Objects for Multiple Groups

expMZ     <- mxExpectationNormal( covariance="expCovMZ", means="meanG", dimnames=selVars, thresholds="threG", threshnames=ordVars )

expDZ     <- mxExpectationNormal( covariance="expCovDZ", means="meanG", dimnames=selVars, thresholds="threG", threshnames=ordVars )

funML     <- mxFitFunctionML()
# Create Model Objects for Multiple Groups

pars      <- list(meanG, thinG, inc, threG, matI, invSD, matUnv, matOc,

                  pathA, pathC, pathE, covA, covC, covE, covP, corA, corC, corE)

modelMZ   <- mxModel( name="MZ", pars, covMZ, expCovMZ, dataMZ, expMZ, funML )

modelDZ   <- mxModel( name="DZ", pars, covDZ, expCovDZ, dataDZ, expDZ, funML )

multi     <- mxFitFunctionMultigroup( c("MZ","DZ") )
# Create Algebra for Variance Components

rowVC     <- rep('VC',nv)

colVC     <- rep(c('A','C','E','SA','SC','SE'),each=nv)

estVC     <- mxAlgebra( expression=cbind(A,C,E,A/V,C/V,E/V), name="VC", dimnames=list(rowVC,colVC))
# Create Confidence Interval Objects

ciACE     <- mxCI(c("VC[1,7]","VC[1,9]", "VC[1,11]", "VC[2,8]", "VC[2,10]", "VC[2,12]" ))

# Build Model with Confidence Intervals modelACE <- mxModel( "twoACEj", pars, var1, modelMZ, modelDZ, multi, estVC,ciACE)

Subsequently, based on the below output I choose the ACE model over any submodel:

base comparison ep minus2LL df AIC diffLL diffdf p
1 twoACEj 11 32712.800 11178 10356.800 NA NA NA
2 twoACEj twoAEj 8 32722.252 11181 10360.252 9.4523951 3 2.3843227e-02
3 twoACEj twoCEj 8 32940.398 11181 10578.398 227.5982305 3 4.5718550e-49
4 twoACEj twoEj 5 34257.189 11184 11889.189 1544.3890445 6 0.0000000e+00

However, the lower bound of the CI of SA is 0.000, is this correct and does this make sense?

> fitEsts(fitACE)
mean_bmi t1_hs a_1_1 a_2_1 a_2_2 c_1_1 c_2_1 c_2_2 e_1_1 e_2_1 e_2_2
23.1373 2.2241 0.8065 -0.0811 2.8604 0.3302 -1.1363 0.0023 0.4904 -0.0410 1.9097
A A C C E E SA SA SC SC SE SE
VC 0.6505 -0.0654 0.1091 -0.3753 0.2405 -0.0201 0.6505 0.1419 0.1091 0.8144 0.2405 0.0436
VC -0.0654 8.1886 -0.3753 1.2913 -0.0201 3.6485 0.1419 0.6237 0.8144 0.0984 0.0436 0.2779
lbound estimate ubound
twoACEj.VC[1,7] 0.0000 0.6505 0.8507
twoACEj.VC[1,9] 0.0059 0.1091 0.6982
twoACEj.VC[1,11] 0.1067 0.2405 0.4640
twoACEj.VC[2,8] 0.5352 0.6237 0.7075
twoACEj.VC[2,10] 0.0196 0.0984 0.1809
twoACEj.VC[2,12] 0.2580 0.2779 0.2992

Moreover, the below CI details seem to "report a status code "nonzero gradient/red for all CIs. I am not sure how to interpret this?

CI details:
parameter value side fit mean_bmi t1_hs a_1_1 a_2_1 a_2_2 c_1_1
1 twoACEj.VC[1,7] 2.4850239e-05 lower 32716.641 23.140124 2.2065754 0.004985004 0.108697628 2.8455448 0.811215832
2 twoACEj.VC[1,7] 8.5065598e-01 upper 32716.650 23.128152 2.2105490 0.922310133 -0.299920357 2.8246636 0.157758943
3 twoACEj.VC[1,9] 5.8810741e-03 lower 32716.644 23.120816 2.2181279 0.882108184 -0.458473930 2.8182535 0.076688161
4 twoACEj.VC[1,9] 6.9816923e-01 upper 32716.673 23.138658 2.2183166 0.018547226 -2.493000405 1.4130896 0.835565224
5 twoACEj.VC[1,11] 1.0665508e-01 lower 32716.648 23.135947 2.2007855 0.867513790 -0.120282731 2.8472254 0.375186283
6 twoACEj.VC[1,11] 4.6401224e-01 upper 32716.658 23.136136 2.2373873 0.573177370 -0.078829212 2.8644263 0.455475777
7 twoACEj.VC[2,8] 5.3520770e-01 lower 32716.646 23.138107 2.2231490 0.826606517 -0.066884929 2.6534599 0.281284034
8 twoACEj.VC[2,8] 7.0746664e-01 upper 32716.645 23.131552 2.2216208 0.739852906 -0.257968239 3.0366597 0.479652614
9 twoACEj.VC[2,10] 1.9566469e-02 lower 32716.645 23.130503 2.2220530 0.731951748 -0.280816975 3.0194490 0.488947391
10 twoACEj.VC[2,10] 1.8086435e-01 upper 32716.647 23.138985 2.2244832 0.799906860 -0.053095311 2.6695519 0.313275027
11 twoACEj.VC[2,12] 2.5798502e-01 lower 32716.643 23.138385 2.2240622 0.794925301 -0.082005655 2.9456948 0.347706223
12 twoACEj.VC[2,12] 2.9923245e-01 upper 32716.643 23.136116 2.2281715 0.782260737 -0.081690070 2.7667635 0.358493602
c_2_1 c_2_2 e_1_1 e_2_1 e_2_2 method diagnostic statusCode
1 -0.54132925 1.0148719e+00 0.58472646 -0.0458843144 1.9226946 neale-miller-1997 success nonzero gradient/red
2 -1.18976326 4.9083101e-04 0.35278341 0.0193929090 1.9121737 neale-miller-1997 success nonzero gradient/red
3 -0.85016056 7.6804313e-01 0.46476239 0.0483915125 1.9088932 neale-miller-1997 success nonzero gradient/red
4 -0.47162022 1.0220297e+00 0.54907811 -0.0425895786 1.9092756 neale-miller-1997 success nonzero gradient/red
5 -0.93157210 6.9198941e-01 0.32658089 -0.0382025425 1.9088138 neale-miller-1997 success nonzero gradient/red
6 -0.84669292 7.4380220e-01 0.68118617 -0.0357496784 1.9094127 neale-miller-1997 success nonzero gradient/red
7 -1.37263512 6.8850457e-01 0.48744340 -0.0423087655 1.9386693 neale-miller-1997 success nonzero gradient/red
8 -0.54445165 -7.7042247e-05 0.47175324 -0.0136392463 1.8825139 neale-miller-1997 success nonzero gradient/red
9 -0.50536961 -2.7393269e-05 0.47452828 -0.0095315172 1.8977468 neale-miller-1997 success nonzero gradient/red
10 -1.27158725 8.8090563e-01 0.51186716 -0.0442595747 1.9251761 neale-miller-1997 success nonzero gradient/red
11 -1.08633458 2.5297820e-01 0.49718742 -0.0415448035 1.8574297 neale-miller-1997 success nonzero gradient/red
12 -1.04397757 5.3634622e-01 0.50947280 -0.0396417596 1.9642382 neale-miller-1997 success nonzero gradient/red

I appreciate your help!

Replied on Mon, 06/25/2018 - 13:36

AdminRobK Joined: Jan 24, 2014

answers

The script runs without errors, but my first question is whether my adaptations are correct?

Probably? It would help to know specifically what changes you made, or at least the script as it was before you modified it. At a glance, I don't see anything wrong with your script (although creating inc and post-multiplying it by thinG isn't necessary in your case). Is there a reason you're not adjusting for the main effects of age and sex?

However, the lower bound of the CI of SA is 0.000, is this correct and does this make sense?

It's not obviously wrong. But, I bet that confidence limit is too narrow, since its lower limit would probably be negative if your MxModel allowed it. Your MxModel doesn't allow it because the numerator of "SA" is the square of the [1,1] element of a. We don't really recommend the Cholesky parameterization anymore, for this reason. I bet you're putting lower bounds of zero on the diagonal of a, too. To switch to the "direct-symmetric" parameterization, replace this,
# Create Matrices for Path Coefficients pathA <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("a",nv), lbound=lbPaD, name="a" ) pathC <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPaD, label=labLower("c",nv), lbound=lbPaD, name="c" ) pathE <- mxMatrix( type="Lower", nrow=nv, ncol=nv, free=TRUE, values=svPeD, label=labLower("e",nv), lbound=lbPaD, name="e" )

# Create Algebra for Variance Comptwonts covA <- mxAlgebra( expression=a %*% t(a), name="A" ) covC <- mxAlgebra( expression=c %*% t(c), name="C" ) covE <- mxAlgebra( expression=e %*% t(e), name="E" )
, with this,
covA <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, name="A" ) covC <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, name="C" ) covE <- mxMatrix( type="Symm", nrow=nv, ncol=nv, free=TRUE, name="E" )
(I leave it up to you to decide if you want to use the same labels and start values as before).

In any event, you can verify the lower confidence limit by making a new MxModel that fixes "SA" to zero, and see if that worsens the -2logL by about 3.84. For instance:
fitACE2 <- mxModel(fitACE, mxConstraint(VC[1,7]==0)) fitACE2 <- mxRun(fitACE2) #now compare change in fit

Moreover, the below CI details seem to "report a status code "nonzero gradient/red for all CIs. I am not sure how to interpret this?

Yes, but they also report "success", which is why all confidence limits are reported in non-verbose summary. There are two different ways that the OpenMx backend represents the confidence-limit search problem. The way that is default for NPSOL (which I assume you're using) and CSOLNP is described in Neale & Miller (1997). Note that OpenMx's implementation assumes that the second term in the RHS of Neale & Miller's Equation (1) is zero. As a result, the solution to the confidence-limit search will not necessarily "look like a minimum" in the sense of having a near-zero gradient.

SLSQP, by default, uses the other representation of the search problem, and on theoretical grounds alone, I feel comfortable recommending SLSQP as the first-choice optimizer for confidence intervals. omxRunCI() can run an MxModel to get confidence intervals without redoing the primary optimization, and uses SLSQP by default.

Log in or register to post comments

Log in or register to post comments

News

Recent Posts

Bivariate model with binary and continuous variable

answers