# NA CIs for Bivariate Model

2 posts / 0 new
Offline
Joined: 01/15/2024 - 08:20
NA CIs for Bivariate Model

Hi, I am trying to run a bivariate model with two variables and 2 covariates. However, I get NA !!! for BivA, BivC and BivE. Can you help me solve this issue?

Here is the script:

### SELECT VARIABLES HERE
data_file <- "Cerebellum_volumes.csv"
variable_input1 <- "lsas_total"
variable_input2 <- "Vermis_VII"
cov1 <- "Total_Cerebel_Vol"
cov2 <- "age"

### Import data

# Identify rows with any NaN values in either variable_input1 or variable_input2
complete_cases <- complete.cases(dat[[variable_input1]], dat[[variable_input2]])
rows_with_nan <- which(!complete_cases)
pair_ids_with_nan <- dat$pair_id[rows_with_nan] dat <- dat[!(dat$pair_id %in% pair_ids_with_nan), ]

#Take the square root of the variables
dat$lsas_total <- sqrt(dat$lsas_total)
dat$Vermis_VII <- sqrt(dat$Vermis_VII)
dat$Total_Cerebel_Vol <- sqrt(dat$Total_Cerebel_Vol)
dat$age <- sqrt(dat$age)

# Sort rows based on twin pairs.
dat <- dat[order(dat$pair_number), ] dat[[variable_input1]] <- scale(dat[[variable_input1]]) dat[[variable_input2]] <- scale(dat[[variable_input2]]) dat$Total_Cerebel_Vol <- scale(dat$Total_Cerebel_Vol) dat$age <- scale(dat$age) ############################################################################### ### Process bivariate correlations. twin_variable_one <- variable_input1 twin_variable_one1 <- sprintf("%s%s", twin_variable_one, "1") twin_variable_one2 <- sprintf("%s%s", twin_variable_one, "2") twin_variable_two <- variable_input2 twin_variable_two1 <- sprintf("%s%s", twin_variable_two, "1") twin_variable_two2 <- sprintf("%s%s", twin_variable_two, "2") ##covs cov1_1 <- sprintf("%s%s", cov1, "1") cov1_2 <- sprintf("%s%s", cov1, "2") cov2_1 <- sprintf("%s%s", cov2, "1") cov2_2 <- sprintf("%s%s", cov2, "2") # Split on MZ and DZ for first variable input. twin_variables <- fast.reshape(dat, id="pair_number",varying=c(twin_variable_one,cov1,cov2)) mz_var1 <- subset(twin_variables, zyg=="1")[,c(twin_variable_one1,twin_variable_one2,cov1_1,cov2_1)] dz_var1 <- subset(twin_variables, zyg=="2")[,c(twin_variable_one1,twin_variable_one2,cov1_1,cov2_1)] # Split on MZ and DZ for second variable input. twin_variables <- fast.reshape(dat, id="pair_number",varying=c(twin_variable_two,cov1,cov2)) mz_var2 <- subset(twin_variables, zyg=="1")[,c(twin_variable_two1,twin_variable_two2,cov1_2,cov2_2)] dz_var2 <- subset(twin_variables, zyg=="2")[,c(twin_variable_two1,twin_variable_two2,cov1_2,cov2_2)] datMZ <- data.frame(mz_var1[1], mz_var2[1], mz_var1[2], mz_var2[2], mz_var1[3], mz_var2[3], mz_var1[4], mz_var2[4]) datDZ <- data.frame(dz_var1[1], dz_var2[1], dz_var1[2], dz_var2[2], dz_var1[3], dz_var2[3], dz_var1[4], dz_var2[4]) ####Set variable names #varNames <- c('X1','Y1','X2','Y2') varNames <- names(datMZ[,1:4]) sink(file="mx_vermis_VII_lsas_total_cov_Total_Cerebel_Vol_age.txt") ### Observed correlation matrices ### MZ cat("MZ") obsMZ <- cor( datMZ[,1:4] ) round( obsMZ , 2 ) ### DZ cat("DZ") obsDZ <- cor( datDZ[,1:4] ) round( obsDZ , 2 ) ################################################################################ ### Set up OpenMx model baseMod <- mxModel('Base', ### Means #mxMatrix(type='Full',nrow=1,ncol=4,free=T,values=c(0,0,0,0),labels=c('mX','mY','mX','mY'),name='expMeans'), ### Define ACE-path coefficients mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('a11','a12','a22'),name='A'), mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('c11','c12','c22'),name='C'), mxMatrix(type='Lower',ncol=2,nrow=2,free=c(TRUE,TRUE,TRUE),values=c(.0,.0,.0),labels=c('e11','e12','e22'),name='E'), ### Correlation matrices due to A, C and E mxAlgebra(A%*%t(A),name='A2'), mxAlgebra(C%*%t(C),name='C2'), mxAlgebra(E%*%t(E),name='E2'), ### Expected covariance matrices mxAlgebra(rbind(cbind(A2+C2+E2 , A2+C2 ), cbind(A2+C2 , A2+C2+E2 )) , name='expCovMZ'), mxAlgebra(rbind(cbind(A2+C2+E2 , .5%x%A2+C2 ), cbind(.5%x%A2+C2 , A2+C2+E2 )) , name='expCovDZ'), ### Regression coefficients mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values= .0001,labels='bXs',name='BXs'), mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values=-.0001,labels='bYs',name='BYs'), mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values= .0001,labels='bXa',name='BXa'), mxMatrix(type='Full',ncol=1,nrow=1,free=TRUE,values=-.0001,labels='bYa',name='BYa'), ### Estimates of interest # Total Covariance mxAlgebra( A2 + C2 + E2 , name='VAR' ), ### Trait 1: # Heritability mxAlgebra( A2[1,1]/VAR[1,1] , name='h2_1' ), # Shared environment mxAlgebra( C2[1,1]/VAR[1,1] , name='c2_1' ), # Non-shared environment mxAlgebra( E2[1,1]/VAR[1,1] , name='e2_1' ), ### Trait 2: # Heritability - Additive only mxAlgebra( A2[2,2]/VAR[2,2] , name='h2_2' ), # Shared environment mxAlgebra( C2[2,2]/VAR[2,2] , name='c2_2' ), # Non-shared environment mxAlgebra( E2[2,2]/VAR[2,2] , name='e2_2' ), ### Between traits # Phenotypic correlation (correlation between variable 1 and variable 2) mxAlgebra( cov2cor(VAR)[2,1] , name='rph' ), # Genetic correlation due to A mxAlgebra( cov2cor(A2)[2,1] , name='rA' ), # Correlation between shared environments mxAlgebra( cov2cor(C2)[2,1] , name='rC' ), # Correlation between non-shared environments mxAlgebra( cov2cor(E2)[2,1] , name='rE' ), # Bivariate heritability - i.e. how much of correlation between traits is # explained by additive genetics? mxAlgebra( A2[2,1]/VAR[2,1] , name='bivA' ), # Bivariate C mxAlgebra( C2[2,1]/VAR[2,1] , name='bivC' ), # Bivariate E mxAlgebra( E2[2,1]/VAR[2,1] , name='bivE' ) ) ### Submodels for Data and likelihoods mzMod <- mxModel('MZ', # Data mxData(datMZ , type='raw'), # Definition variables mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol1',name='S1'), mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol2',name='S2'), mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.age1',name='Age'), mxAlgebra(cbind( Base.BXs*S1+Base.BXa*Age, Base.BYs*S1+Base.BYa*Age ,Base.BXs*S2+Base.BXa*Age, Base.BYs*S2+Base.BYa*Age ),name='expMeans'), mxExpectationNormal(means='expMeans',covariance='Base.expCovMZ',dimnames=varNames), mxFitFunctionML() ) dzMod <- mxModel('DZ', # Data mxData(datDZ , type='raw'), # Definition variables mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol1',name='S1'), mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.Total_Cerebel_Vol2',name='S2'), mxMatrix(type='Full',ncol=1,nrow=1,free=FALSE,labels='data.age1',name='Age'), mxAlgebra(cbind( Base.BXs*S1+Base.BXa*Age, Base.BYs*S1+Base.BYa*Age , Base.BXs*S2+Base.BXa*Age, Base.BYs*S2+Base.BYa*Age),name='expMeans'), mxExpectationNormal(means='expMeans',covariance='Base.expCovDZ',dimnames=varNames), mxFitFunctionML() ) ### Add the submodels together aceMod <- mxModel( 'ACEmodel' , baseMod , mzMod , dzMod , # Add likelihoods together mxFitFunctionMultigroup(c('MZ','DZ')), # Profile likelhood confidence bounds mxCI(c( 'Base.h2_1','Base.c2_1','Base.e2_1', 'Base.h2_2','Base.c2_2','Base.e2_2', 'Base.rph','Base.rA','Base.rC','Base.rE', 'Base.bivA','Base.bivC','Base.bivE')) ) ### Fit model aceModFit <- mxRun( aceMod , intervals=F ) ### Look at result summary(aceModFit, verbose = T) ### Create Wald-type confidence intervals ests <- rbind( c(mxEval( Base.h2_1 , aceModFit ),mxSE( Base.h2_1 , aceModFit )), c(mxEval( Base.c2_1 , aceModFit ),mxSE( Base.c2_1 , aceModFit )), c(mxEval( Base.e2_1 , aceModFit ),mxSE( Base.e2_1 , aceModFit )), c(mxEval( Base.h2_2 , aceModFit ),mxSE( Base.h2_2 , aceModFit )), c(mxEval( Base.c2_2 , aceModFit ),mxSE( Base.c2_2 , aceModFit )), c(mxEval( Base.e2_2 , aceModFit ),mxSE( Base.e2_2 , aceModFit )), c(mxEval( Base.rph , aceModFit ),mxSE( Base.rph , aceModFit )), c(mxEval( Base.rA , aceModFit ),mxSE( Base.rA , aceModFit )), c(mxEval( Base.rC , aceModFit ),mxSE( Base.rC , aceModFit )), c(mxEval( Base.rE , aceModFit ),mxSE( Base.rE , aceModFit )), c(mxEval( Base.bivA , aceModFit ),mxSE( Base.bivA , aceModFit )), c(mxEval( Base.bivC , aceModFit ),mxSE( Base.bivC , aceModFit )), c(mxEval( Base.bivE , aceModFit ),mxSE( Base.bivE , aceModFit )) ) out <- cbind( ests , ests[,1]/ests[,2] , 2*pnorm(-abs(ests[,1]/ests[,2])) , ests[,1]-1.96*ests[,2] , ests[,1]+1.96*ests[,2] ) colnames(out) <- c('Estimate','Std.error','z-val','p-val','lower bound','upper bound') rownames(out) <- c('h2_1','c2_1','e2_1', 'h2_2','c2_2','e2_2', 'rph','rA','rC','rE', 'bivA','bivC','bivE') round( out ,2 ) ### Profile likelihood intervals (if intervals=T above) round(summary(aceModFit)$CI[,c(2,1,3)],2)
Offline
Joined: 03/01/2013 - 14:09
Include data

Hi

I really need to be able to run the script to reproduce your error. I can't do that without data. While you may not wish to share the actual data you are working with, you might work around that issue by using mxGenerateData() to make datasets that are like the actual data you have, but based on the parameter estimates' actual values in the script, which are then used to create datasets with about the same means and covariance matrices as the expected values from the model. If you follow this route, please check to see the error emerges with the generated data as well (you might need to use generate data with the fitted model).

Second, I am no fan of the statistics that you are calculating. They are supposed to be proportions of the covariance between the variables. Unfortunately (it's something that some of the textbooks mistakenly ignore) you can easily find that the A C and E covariance components are not all of the same sign. Thus you could get "proportions" that are well outside the 0 to 1 range. Pretending that they are proportions, even when they are all of the same sign is still not a good idea because it gives an artificial impression that their range is 0-1 when it is not. One possibility is to sum the unsigned components of the covariance, and then put the signs back in after calculating the proportions. That at least keeps things in the range of 0-1. For the most part I avoid using "proportion" or (ew, yuck even worse) "bivariate heritability" because they are not generally proportions. The phrase I would use is contribution to covariance, and keep them signed.

It is possible that the confidence intervals are not working well because the signs of the proportions could flip around a lot. So we can get a negative covariance contribution from A when: i) A11 only is negative, ii) A21 only is negative, iii) A22 only is negative, or iv) when any two are negative. Some of this issue is a consequence of using the Cholesky decomposition, which is no longer recommended. See https://pubmed.ncbi.nlm.nih.gov/30569348/ for some reasons why using the Cholesky is usually a bad idea.