NA CIs for Bivariate Model

Posted on Thu, 03/07/2024 - 09:02

Tugce Yildiz Joined: 01/15/2024

Forums

Hi, I am trying to run a bivariate model with two variables and 2 covariates. However, I get NA !!! for BivA, BivC and BivE. Can you help me solve this issue?

Here is the script:
### SELECT VARIABLES HERE data_file <- "Cerebellum_volumes.csv" variable_input1 <- "lsas_total" variable_input2 <- "Vermis_VII" cov1 <- "Total_Cerebel_Vol" cov2 <- "age"


### Import data

dat <- read.csv(file=data_file, header=TRUE, sep = ";")
# Identify rows with any NaN values in either variable_input1 or variable_input2

complete_cases <- complete.cases(dat[[variable_input1]], dat[[variable_input2]])

rows_with_nan <- which(!complete_cases)

pair_ids_with_nan <- dat$pair_id[rows_with_nan]

dat <- dat[!(dat$pair_id %in% pair_ids_with_nan), ]
#Take the square root of the variables

dat$lsas_total <- sqrt(dat$lsas_total)

dat$Vermis_VII <- sqrt(dat$Vermis_VII)

dat$Total_Cerebel_Vol <- sqrt(dat$Total_Cerebel_Vol)

dat$age <- sqrt(dat$age)
# Sort rows based on twin pairs.

dat <- dat[order(dat$pair_number), ]
dat[[variable_input1]] <- scale(dat[[variable_input1]])

dat[[variable_input2]] <- scale(dat[[variable_input2]])

dat$Total_Cerebel_Vol <- scale(dat$Total_Cerebel_Vol)

dat$age <- scale(dat$age)
###############################################################################

### Process bivariate correlations.

twin_variable_one <- variable_input1

twin_variable_one1 <- sprintf("%s%s", twin_variable_one, "1")

twin_variable_one2 <- sprintf("%s%s", twin_variable_one, "2")

twin_variable_two <- variable_input2

twin_variable_two1 <- sprintf("%s%s", twin_variable_two, "1")

twin_variable_two2 <- sprintf("%s%s", twin_variable_two, "2")

##covs

cov1_1 <- sprintf("%s%s", cov1, "1")

cov1_2 <- sprintf("%s%s", cov1, "2")

cov2_1 <- sprintf("%s%s", cov2, "1")

cov2_2 <- sprintf("%s%s", cov2, "2")
# Split on MZ and DZ for first variable input.

twin_variables <- fast.reshape(dat, id="pair_number",varying=c(twin_variable_one,cov1,cov2))

mz_var1 <- subset(twin_variables, zyg=="1")[,c(twin_variable_one1,twin_variable_one2,cov1_1,cov2_1)]

dz_var1 <- subset(twin_variables, zyg=="2")[,c(twin_variable_one1,twin_variable_one2,cov1_1,cov2_1)]
# Split on MZ and DZ for second variable input.

twin_variables <- fast.reshape(dat, id="pair_number",varying=c(twin_variable_two,cov1,cov2))

mz_var2 <- subset(twin_variables, zyg=="1")[,c(twin_variable_two1,twin_variable_two2,cov1_2,cov2_2)]

dz_var2 <- subset(twin_variables, zyg=="2")[,c(twin_variable_two1,twin_variable_two2,cov1_2,cov2_2)]
datMZ <- data.frame(mz_var1[1], mz_var2[1], mz_var1[2], mz_var2[2], mz_var1[3], mz_var2[3], mz_var1[4], mz_var2[4])

datDZ <- data.frame(dz_var1[1], dz_var2[1], dz_var1[2], dz_var2[2], dz_var1[3], dz_var2[3], dz_var1[4], dz_var2[4])
####Set variable names

#varNames <- c('X1','Y1','X2','Y2')

varNames <- names(datMZ[,1:4])

sink(file="mx_vermis_VII_lsas_total_cov_Total_Cerebel_Vol_age.txt")

### Observed correlation matrices

### MZ

cat("MZ")

obsMZ <- cor( datMZ[,1:4] )

round( obsMZ , 2 )

### DZ

cat("DZ")

obsDZ <- cor( datDZ[,1:4] )

round( obsDZ , 2 )

################################################################################

### Set up OpenMx model

baseMod <- mxModel('Base',

                   ### Means

                   #mxMatrix(type='Full',nrow=1,ncol=4,free=T,values=c(0,0,0,0),labels=c('mX','mY','mX','mY'),name='expMeans'),
                   ### Define ACE-path coefficients

                   mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('a11','a12','a22'),name='A'),

                   mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('c11','c12','c22'),name='C'),

                   mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('e11','e12','e22'),name='E'),
                   ### Correlation matrices due to A, C and E

                   mxAlgebra(A%*%t(A),name='A2'),

                   mxAlgebra(C%*%t(C),name='C2'),

                   mxAlgebra(E%*%t(E),name='E2'),
                   ### Expected covariance matrices

                   mxAlgebra(rbind(cbind(A2+C2+E2 , A2+C2    ),

                                   cbind(A2+C2    , A2+C2+E2 )) , name='expCovMZ'),

                   mxAlgebra(rbind(cbind(A2+C2+E2   , .5%x%A2+C2 ),

                                   cbind(.5%x%A2+C2 , A2+C2+E2   )) , name='expCovDZ'),
                   ### Regression coefficients

                   mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values= .0001,labels='bXs',name='BXs'),

                   mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values=-.0001,labels='bYs',name='BYs'),

                   mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values= .0001,labels='bXa',name='BXa'),

                   mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values=-.0001,labels='bYa',name='BYa'),
                   ### Estimates of interest

                   # Total Covariance

                   mxAlgebra( A2 + C2 + E2 , name='VAR' ),

                   ### Trait 1:

                   # Heritability

                   mxAlgebra( A2[1,1]/VAR[1,1] , name='h2_1' ),

                   # Shared environment

                   mxAlgebra( C2[1,1]/VAR[1,1] , name='c2_1' ),

                   # Non-shared environment

                   mxAlgebra( E2[1,1]/VAR[1,1] , name='e2_1' ),

                   ### Trait 2:

                   # Heritability - Additive only

                   mxAlgebra( A2[2,2]/VAR[2,2] , name='h2_2' ),

                   # Shared environment

                   mxAlgebra( C2[2,2]/VAR[2,2] , name='c2_2' ),

                   # Non-shared environment

                   mxAlgebra( E2[2,2]/VAR[2,2] , name='e2_2' ),

                   ### Between traits

                   # Phenotypic correlation (correlation between variable 1 and variable 2)

                   mxAlgebra( cov2cor(VAR)[2,1] , name='rph' ),

                   # Genetic correlation due to A

                   mxAlgebra( cov2cor(A2)[2,1] , name='rA' ),

                   # Correlation between shared environments

                   mxAlgebra( cov2cor(C2)[2,1] , name='rC' ),

                   # Correlation between non-shared environments

                   mxAlgebra( cov2cor(E2)[2,1] , name='rE' ),

                   # Bivariate heritability - i.e. how much of correlation between traits is

                   # explained by additive genetics?

                   mxAlgebra( A2[2,1]/VAR[2,1] , name='bivA' ),

                   # Bivariate C

                   mxAlgebra( C2[2,1]/VAR[2,1] , name='bivC' ),

                   # Bivariate E

                   mxAlgebra( E2[2,1]/VAR[2,1] , name='bivE' )

)

### Submodels for Data and likelihoods

mzMod <- mxModel('MZ',

                 # Data

                 mxData(datMZ , type='raw'),

                 # Definition variables

                 mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol1',name='S1'),

                 mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol2',name='S2'),

                 mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.age1',name='Age'),

                 mxAlgebra(cbind( Base.BXs*S1+Base.BXa*Age, Base.BYs*S1+Base.BYa*Age

                                  ,Base.BXs*S2+Base.BXa*Age, Base.BYs*S2+Base.BYa*Age ),name='expMeans'),

                 mxExpectationNormal(means='expMeans',covariance='Base.expCovMZ',dimnames=varNames),

                 mxFitFunctionML()

)

dzMod <- mxModel('DZ',

                 # Data

                 mxData(datDZ , type='raw'),

                 # Definition variables

                 mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol1',name='S1'),

                 mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol2',name='S2'),

                 mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.age1',name='Age'),

                 mxAlgebra(cbind( Base.BXs*S1+Base.BXa*Age, Base.BYs*S1+Base.BYa*Age

                                  ,  Base.BXs*S2+Base.BXa*Age, Base.BYs*S2+Base.BYa*Age),name='expMeans'),

                 mxExpectationNormal(means='expMeans',covariance='Base.expCovDZ',dimnames=varNames),

                 mxFitFunctionML()

)

### Add the submodels together

aceMod <- mxModel( 'ACEmodel' , baseMod , mzMod , dzMod ,

                   # Add likelihoods together

                   mxFitFunctionMultigroup(c('MZ','DZ')),

# Profile likelhood confidence bounds mxCI(c( 'Base.h2_1','Base.c2_1','Base.e2_1', 'Base.h2_2','Base.c2_2','Base.e2_2', 'Base.rph','Base.rA','Base.rC','Base.rE', 'Base.bivA','Base.bivC','Base.bivE')) ) ### Fit model aceModFit <- mxRun( aceMod , intervals=F ) ### Look at result summary(aceModFit, verbose = T) ### Create Wald-type confidence intervals ests <- rbind( c(mxEval( Base.h2_1 , aceModFit ),mxSE( Base.h2_1 , aceModFit )), c(mxEval( Base.c2_1 , aceModFit ),mxSE( Base.c2_1 , aceModFit )), c(mxEval( Base.e2_1 , aceModFit ),mxSE( Base.e2_1 , aceModFit )), c(mxEval( Base.h2_2 , aceModFit ),mxSE( Base.h2_2 , aceModFit )), c(mxEval( Base.c2_2 , aceModFit ),mxSE( Base.c2_2 , aceModFit )), c(mxEval( Base.e2_2 , aceModFit ),mxSE( Base.e2_2 , aceModFit )), c(mxEval( Base.rph , aceModFit ),mxSE( Base.rph , aceModFit )), c(mxEval( Base.rA , aceModFit ),mxSE( Base.rA , aceModFit )), c(mxEval( Base.rC , aceModFit ),mxSE( Base.rC , aceModFit )), c(mxEval( Base.rE , aceModFit ),mxSE( Base.rE , aceModFit )), c(mxEval( Base.bivA , aceModFit ),mxSE( Base.bivA , aceModFit )), c(mxEval( Base.bivC , aceModFit ),mxSE( Base.bivC , aceModFit )), c(mxEval( Base.bivE , aceModFit ),mxSE( Base.bivE , aceModFit )) ) out <- cbind( ests , ests[,1]/ests[,2] , 2*pnorm(-abs(ests[,1]/ests[,2])) , ests[,1]-1.96*ests[,2] , ests[,1]+1.96*ests[,2] ) colnames(out) <- c('Estimate','Std.error','z-val','p-val','lower bound','upper bound') rownames(out) <- c('h2_1','c2_1','e2_1', 'h2_2','c2_2','e2_2', 'rph','rA','rC','rE', 'bivA','bivC','bivE') round( out ,2 ) ### Profile likelihood intervals (if intervals=T above) round(summary(aceModFit)$CI[,c(2,1,3)],2)

Replied on Fri, 03/08/2024 - 10:11

AdminNeale Joined: Mar 01, 2013

Include data

I really need to be able to run the script to reproduce your error. I can't do that without data. While you may not wish to share the actual data you are working with, you might work around that issue by using mxGenerateData() to make datasets that are like the actual data you have, but based on the parameter estimates' actual values in the script, which are then used to create datasets with about the same means and covariance matrices as the expected values from the model. If you follow this route, please check to see the error emerges with the generated data as well (you might need to use generate data with the fitted model).

Second, I am no fan of the statistics that you are calculating. They are supposed to be proportions of the covariance between the variables. Unfortunately (it's something that some of the textbooks mistakenly ignore) you can easily find that the A C and E covariance components are not all of the same sign. Thus you could get "proportions" that are well outside the 0 to 1 range. Pretending that they are proportions, even when they are all of the same sign is still not a good idea because it gives an artificial impression that their range is 0-1 when it is not. One possibility is to sum the unsigned components of the covariance, and then put the signs back in after calculating the proportions. That at least keeps things in the range of 0-1. For the most part I avoid using "proportion" or (ew, yuck even worse) "bivariate heritability" because they are not generally proportions. The phrase I would use is contribution to covariance, and keep them signed.

It is possible that the confidence intervals are not working well because the signs of the proportions could flip around a lot. So we can get a negative covariance contribution from A when: i) A11 only is negative, ii) A21 only is negative, iii) A22 only is negative, or iv) when any two are negative. Some of this issue is a consequence of using the Cholesky decomposition, which is no longer recommended. See https://pubmed.ncbi.nlm.nih.gov/30569348/ for some reasons why using the Cholesky is usually a bad idea.

News

Recent Posts

NA CIs for Bivariate Model

Include data

News

Recent Posts