NA CIs for Bivariate Model
Here is the script:
### SELECT VARIABLES HERE
data_file <- "Cerebellum_volumes.csv"
variable_input1 <- "lsas_total"
variable_input2 <- "Vermis_VII"
cov1 <- "Total_Cerebel_Vol"
cov2 <- "age"
### Import data
dat <- read.csv(file=data_file, header=TRUE, sep = ";")
# Identify rows with any NaN values in either variable_input1 or variable_input2
complete_cases <- complete.cases(dat[[variable_input1]], dat[[variable_input2]])
rows_with_nan <- which(!complete_cases)
pair_ids_with_nan <- dat$pair_id[rows_with_nan]
dat <- dat[!(dat$pair_id %in% pair_ids_with_nan), ]
#Take the square root of the variables
dat$lsas_total <- sqrt(dat$lsas_total)
dat$Vermis_VII <- sqrt(dat$Vermis_VII)
dat$Total_Cerebel_Vol <- sqrt(dat$Total_Cerebel_Vol)
dat$age <- sqrt(dat$age)
# Sort rows based on twin pairs.
dat <- dat[order(dat$pair_number), ]
dat[[variable_input1]] <- scale(dat[[variable_input1]])
dat[[variable_input2]] <- scale(dat[[variable_input2]])
dat$Total_Cerebel_Vol <- scale(dat$Total_Cerebel_Vol)
dat$age <- scale(dat$age)
###############################################################################
### Process bivariate correlations.
twin_variable_one <- variable_input1
twin_variable_one1 <- sprintf("%s%s", twin_variable_one, "1")
twin_variable_one2 <- sprintf("%s%s", twin_variable_one, "2")
twin_variable_two <- variable_input2
twin_variable_two1 <- sprintf("%s%s", twin_variable_two, "1")
twin_variable_two2 <- sprintf("%s%s", twin_variable_two, "2")
##covs
cov1_1 <- sprintf("%s%s", cov1, "1")
cov1_2 <- sprintf("%s%s", cov1, "2")
cov2_1 <- sprintf("%s%s", cov2, "1")
cov2_2 <- sprintf("%s%s", cov2, "2")
# Split on MZ and DZ for first variable input.
twin_variables <- fast.reshape(dat, id="pair_number",varying=c(twin_variable_one,cov1,cov2))
mz_var1 <- subset(twin_variables, zyg=="1")[,c(twin_variable_one1,twin_variable_one2,cov1_1,cov2_1)]
dz_var1 <- subset(twin_variables, zyg=="2")[,c(twin_variable_one1,twin_variable_one2,cov1_1,cov2_1)]
# Split on MZ and DZ for second variable input.
twin_variables <- fast.reshape(dat, id="pair_number",varying=c(twin_variable_two,cov1,cov2))
mz_var2 <- subset(twin_variables, zyg=="1")[,c(twin_variable_two1,twin_variable_two2,cov1_2,cov2_2)]
dz_var2 <- subset(twin_variables, zyg=="2")[,c(twin_variable_two1,twin_variable_two2,cov1_2,cov2_2)]
datMZ <- data.frame(mz_var1[1], mz_var2[1], mz_var1[2], mz_var2[2], mz_var1[3], mz_var2[3], mz_var1[4], mz_var2[4])
datDZ <- data.frame(dz_var1[1], dz_var2[1], dz_var1[2], dz_var2[2], dz_var1[3], dz_var2[3], dz_var1[4], dz_var2[4])
####Set variable names
#varNames <- c('X1','Y1','X2','Y2')
varNames <- names(datMZ[,1:4])
sink(file="mx_vermis_VII_lsas_total_cov_Total_Cerebel_Vol_age.txt")
### Observed correlation matrices
### MZ
cat("MZ")
obsMZ <- cor( datMZ[,1:4] )
round( obsMZ , 2 )
### DZ
cat("DZ")
obsDZ <- cor( datDZ[,1:4] )
round( obsDZ , 2 )
################################################################################
### Set up OpenMx model
baseMod <- mxModel('Base',
### Means
#mxMatrix(type='Full',nrow=1,ncol=4,free=T,values=c(0,0,0,0),labels=c('mX','mY','mX','mY'),name='expMeans'),
### Define ACE-path coefficients
mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('a11','a12','a22'),name='A'),
mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('c11','c12','c22'),name='C'),
mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('e11','e12','e22'),name='E'),
### Correlation matrices due to A, C and E
mxAlgebra(A%*%t(A),name='A2'),
mxAlgebra(C%*%t(C),name='C2'),
mxAlgebra(E%*%t(E),name='E2'),
### Expected covariance matrices
mxAlgebra(rbind(cbind(A2+C2+E2 , A2+C2 ),
cbind(A2+C2 , A2+C2+E2 )) , name='expCovMZ'),
mxAlgebra(rbind(cbind(A2+C2+E2 , .5%x%A2+C2 ),
cbind(.5%x%A2+C2 , A2+C2+E2 )) , name='expCovDZ'),
### Regression coefficients
mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values= .0001,labels='bXs',name='BXs'),
mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values=-.0001,labels='bYs',name='BYs'),
mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values= .0001,labels='bXa',name='BXa'),
mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values=-.0001,labels='bYa',name='BYa'),
### Estimates of interest
# Total Covariance
mxAlgebra( A2 + C2 + E2 , name='VAR' ),
### Trait 1:
# Heritability
mxAlgebra( A2[1,1]/VAR[1,1] , name='h2_1' ),
# Shared environment
mxAlgebra( C2[1,1]/VAR[1,1] , name='c2_1' ),
# Non-shared environment
mxAlgebra( E2[1,1]/VAR[1,1] , name='e2_1' ),
### Trait 2:
# Heritability - Additive only
mxAlgebra( A2[2,2]/VAR[2,2] , name='h2_2' ),
# Shared environment
mxAlgebra( C2[2,2]/VAR[2,2] , name='c2_2' ),
# Non-shared environment
mxAlgebra( E2[2,2]/VAR[2,2] , name='e2_2' ),
### Between traits
# Phenotypic correlation (correlation between variable 1 and variable 2)
mxAlgebra( cov2cor(VAR)[2,1] , name='rph' ),
# Genetic correlation due to A
mxAlgebra( cov2cor(A2)[2,1] , name='rA' ),
# Correlation between shared environments
mxAlgebra( cov2cor(C2)[2,1] , name='rC' ),
# Correlation between non-shared environments
mxAlgebra( cov2cor(E2)[2,1] , name='rE' ),
# Bivariate heritability - i.e. how much of correlation between traits is
# explained by additive genetics?
mxAlgebra( A2[2,1]/VAR[2,1] , name='bivA' ),
# Bivariate C
mxAlgebra( C2[2,1]/VAR[2,1] , name='bivC' ),
# Bivariate E
mxAlgebra( E2[2,1]/VAR[2,1] , name='bivE' )
)
### Submodels for Data and likelihoods
mzMod <- mxModel('MZ',
# Data
mxData(datMZ , type='raw'),
# Definition variables
mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol1',name='S1'),
mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol2',name='S2'),
mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.age1',name='Age'),
mxAlgebra(cbind( Base.BXs*S1+Base.BXa*Age, Base.BYs*S1+Base.BYa*Age
,Base.BXs*S2+Base.BXa*Age, Base.BYs*S2+Base.BYa*Age ),name='expMeans'),
mxExpectationNormal(means='expMeans',covariance='Base.expCovMZ',dimnames=varNames),
mxFitFunctionML()
)
dzMod <- mxModel('DZ',
# Data
mxData(datDZ , type='raw'),
# Definition variables
mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol1',name='S1'),
mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol2',name='S2'),
mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.age1',name='Age'),
mxAlgebra(cbind( Base.BXs*S1+Base.BXa*Age, Base.BYs*S1+Base.BYa*Age
, Base.BXs*S2+Base.BXa*Age, Base.BYs*S2+Base.BYa*Age),name='expMeans'),
mxExpectationNormal(means='expMeans',covariance='Base.expCovDZ',dimnames=varNames),
mxFitFunctionML()
)
### Add the submodels together
aceMod <- mxModel( 'ACEmodel' , baseMod , mzMod , dzMod ,
# Add likelihoods together
mxFitFunctionMultigroup(c('MZ','DZ')),
# Profile likelhood confidence bounds
mxCI(c(
'Base.h2_1','Base.c2_1','Base.e2_1',
'Base.h2_2','Base.c2_2','Base.e2_2',
'Base.rph','Base.rA','Base.rC','Base.rE',
'Base.bivA','Base.bivC','Base.bivE'))
)
### Fit model
aceModFit <- mxRun( aceMod , intervals=F )
### Look at result
summary(aceModFit, verbose = T)
### Create Wald-type confidence intervals
ests <- rbind(
c(mxEval( Base.h2_1 , aceModFit ),mxSE( Base.h2_1 , aceModFit )),
c(mxEval( Base.c2_1 , aceModFit ),mxSE( Base.c2_1 , aceModFit )),
c(mxEval( Base.e2_1 , aceModFit ),mxSE( Base.e2_1 , aceModFit )),
c(mxEval( Base.h2_2 , aceModFit ),mxSE( Base.h2_2 , aceModFit )),
c(mxEval( Base.c2_2 , aceModFit ),mxSE( Base.c2_2 , aceModFit )),
c(mxEval( Base.e2_2 , aceModFit ),mxSE( Base.e2_2 , aceModFit )),
c(mxEval( Base.rph , aceModFit ),mxSE( Base.rph , aceModFit )),
c(mxEval( Base.rA , aceModFit ),mxSE( Base.rA , aceModFit )),
c(mxEval( Base.rC , aceModFit ),mxSE( Base.rC , aceModFit )),
c(mxEval( Base.rE , aceModFit ),mxSE( Base.rE , aceModFit )),
c(mxEval( Base.bivA , aceModFit ),mxSE( Base.bivA , aceModFit )),
c(mxEval( Base.bivC , aceModFit ),mxSE( Base.bivC , aceModFit )),
c(mxEval( Base.bivE , aceModFit ),mxSE( Base.bivE , aceModFit ))
)
out <- cbind( ests , ests[,1]/ests[,2] , 2*pnorm(-abs(ests[,1]/ests[,2])) , ests[,1]-1.96*ests[,2] , ests[,1]+1.96*ests[,2] )
colnames(out) <- c('Estimate','Std.error','z-val','p-val','lower bound','upper bound')
rownames(out) <- c('h2_1','c2_1','e2_1',
'h2_2','c2_2','e2_2',
'rph','rA','rC','rE',
'bivA','bivC','bivE')
round( out ,2 )
### Profile likelihood intervals (if intervals=T above)
round(summary(aceModFit)$CI[,c(2,1,3)],2)
Include data
I really need to be able to run the script to reproduce your error. I can't do that without data. While you may not wish to share the actual data you are working with, you might work around that issue by using mxGenerateData() to make datasets that are like the actual data you have, but based on the parameter estimates' actual values in the script, which are then used to create datasets with about the same means and covariance matrices as the expected values from the model. If you follow this route, please check to see the error emerges with the generated data as well (you might need to use generate data with the fitted model).
Second, I am no fan of the statistics that you are calculating. They are supposed to be proportions of the covariance between the variables. Unfortunately (it's something that some of the textbooks mistakenly ignore) you can easily find that the A C and E covariance components are not all of the same sign. Thus you could get "proportions" that are well outside the 0 to 1 range. Pretending that they are proportions, even when they are all of the same sign is still not a good idea because it gives an artificial impression that their range is 0-1 when it is not. One possibility is to sum the unsigned components of the covariance, and then put the signs back in after calculating the proportions. That at least keeps things in the range of 0-1. For the most part I avoid using "proportion" or (ew, yuck even worse) "bivariate heritability" because they are not generally proportions. The phrase I would use is contribution to covariance, and keep them signed.
It is possible that the confidence intervals are not working well because the signs of the proportions could flip around a lot. So we can get a negative covariance contribution from A when: i) A11 only is negative, ii) A21 only is negative, iii) A22 only is negative, or iv) when any two are negative. Some of this issue is a consequence of using the Cholesky decomposition, which is no longer recommended. See https://pubmed.ncbi.nlm.nih.gov/30569348/ for some reasons why using the Cholesky is usually a bad idea.
Log in or register to post comments