Attachment | Size |
---|---|
R code [6] | 6.65 KB |
Hi,
I implemented two ways of calculating the genetic correlation between two phenotypes, estimated from twins (direct model of rg between A1 and A2, and Cholesky).
My two implementations are based on the path diagrams from H.H. Maes' slides 18 and 53 found at
https://vipbg.vcu.edu/media/course/HGEN619_2015/BivariateGeneticAnalysis2.pdf
I do not get the same estimates between the two methods (-0.26 vs 0.51). Below is the code with the reproducible fake data and results. The code is also attached.
If someone could help figuring out why the two methods differ, of help me find a bug in my code, I would appreciate. Many thanks for looking into this.
In the notation below, indices 1 and 2 refer to phenotype 1 and phenotype 2; indices x and y refer to twin 1 and twin 2.
Mathieu
#### library(OpenMx) ####### # creating a reproducible data set library( mvtnorm ) set.seed(12345) twincov<- 0.30 trcov<- 0.1 N<-200 sigmz<-matrix( c( 1, twincov, trcov, trcov, twincov, 1, trcov, trcov, trcov, trcov, 1, twincov, trcov, trcov, twincov,1 ), ncol=4 ) sigdz <-matrix( c( 1, twincov/2, trcov, trcov, twincov/2, 1, trcov, trcov, trcov, trcov, 1, twincov/2, trcov, trcov, twincov/2,1 ), ncol=4 ) tmz <- as.data.frame( rmvnorm(N , c(0,0,0,0) , sigmz ) ) names(tmz)<- c("phen1x", "phen1y", "phen2x", "phen2y") tdz <- as.data.frame(rmvnorm(N , c(0,0,0,0) , sigdz )) names(tdz)<- c("phen1x", "phen1y", "phen2x", "phen2y") dataMZ <- mxData( observed=tmz , type="raw" ) dataDZ <- mxData( observed=tdz , type="raw" ) ############## # defining variables and path # the following are common to the two methods of calculating rg # and will be re-used phenVars<- c("phen1x", "phen1y" ,"phen2x", "phen2y" ) # pheno 1 aceVars1<- c("A1x","C1x","E1x","A1y","C1y","E1y") # pheno 2 aceVars2<- c("A2x","C2x","E2x","A2y","C2y","E2y") latentVariances <- mxPath( from=c( aceVars1, aceVars2 ), arrows=2, free=FALSE, values=1 ) # means of latent variables latentMeans <- mxPath( from="one", to=c( aceVars1, aceVars2 ) , arrows=1, free=FALSE, values=0 ) # means of observed variables obsMeans1 <- mxPath( from="one", to=c("phen1x", "phen1y" ) , arrows=1, free=TRUE, values=0.01 , labels="mean1" ) obsMeans2 <- mxPath( from="one", to=c("phen2x", "phen2y" ) , arrows=1, free=TRUE, values=0.02 , labels="mean2" ) # path coefficients for twin 1, phen 1 path1x <- mxPath( from=c( "A1x","C1x","E1x" ), to="phen1x", arrows=1, free=TRUE, values=c( .12,.22,.32) , label=c( "a1","c1","e1") ) # path coefficients for twin 2, phen 1 path1y <- mxPath( from=c( "A1y","C1y","E1y" ), to="phen1y", arrows=1, free=TRUE, values=c( .12,.22,.32) , label=c( "a1","c1","e1") ) # path coefficients for twin 1, phen 2 path2x <- mxPath( from=c( "A2x","C2x","E2x" ), to="phen2x", arrows=1, free=TRUE, values=c( .18,.28,.38) , label=c( "a2","c2","e2") ) # path coefficients for twin 2, phen 2 path2y <- mxPath( from=c( "A2y","C2y","E2y" ), to="phen2y", arrows=1, free=TRUE, values= c( .18,.28,.38) , label=c( "a2","c2","e2") ) # covariance between C1x and C2y, etc covC1 <- mxPath( from="C1x", to="C1y", arrows=2, free=FALSE , values=1 ) covC2 <- mxPath( from="C2x", to="C2y", arrows=2, free=FALSE , values=1 ) # cov between A1x, A1y etc . index x is twin 1 , y in twin 2 covA1mz <- mxPath( from="A1x", to="A1y", arrows=2, free=FALSE, values=1 ) covA2mz <- mxPath( from="A2x", to="A2y", arrows=2, free=FALSE, values=1 ) covA1dz <- mxPath( from="A1x", to="A1y", arrows=2, free=FALSE, values=.5 ) covA2dz <- mxPath( from="A2x", to="A2y", arrows=2, free=FALSE, values=.5 ) commonPaths <- list( latentVariances, latentMeans,obsMeans1, obsMeans2, path1x, path1y, path2x, path2y , covC1, covC2 ) # END of paths common to both methods ###################################### ######## # method one: directly modeling the genetic correlation between A1 and A2 # index 1 refers to phenotype 1, 2 to phenotype 2 # x refers to twin 1, y to twin 2 pathRGx <- mxPath( from="A1x", to="A2x" , arrows=2 , free=TRUE , value= .5, label="rg" ) pathRGy <- mxPath( from="A1y", to="A2y" , arrows=2 , free=TRUE , value= .5, label="rg" ) # solution seems highly dependent on starting values (see below). # 0.5 was chosen as starting value because # that's close to the estimate of method 2 below # confidence intervals ci.rg <- mxCI("rg") # Defining the models the paths paths.rg <- list( pathRGx, pathRGy ) modelMZ.rg <- mxModel(model="MZrg", type="RAM", manifestVars=phenVars, latentVars=c(aceVars1, aceVars2 ) , commonPaths, paths.rg , covA1mz, covA2mz, dataMZ , ci.rg ) modelDZ.rg <- mxModel(model="DZrg", type="RAM", manifestVars=phenVars, latentVars=c(aceVars1, aceVars2 ) , commonPaths, paths.rg , covA1dz, covA2dz, dataDZ ) multi.rg <- mxFitFunctionMultigroup(c("MZrg","DZrg" ) ) model.rg <- mxModel(model="ACErg", modelMZ.rg, modelDZ.rg, multi.rg ) # check identification print( mxCheckIdentification( model.rg ) ) # the model is identified # Run Model fit.rg <- mxRun( model.rg , intervals=T ) print(fit.rg$output$confidenceIntervals ) # lbound estimate ubound #rg NA -0.2639032 NA # Note that the estimate is far from method 2 (cholesky, below). The sign is also different. # Also, why can't the bounds of the CI be calculated? # If "rg" gets a starting value of 0.25 instead of .5, we get # an estimate out of the intuitive range: (???) # lbound estimate ubound #rg NA 12.27812 NA ######## # method 2 : cholesky # from A1 to phen2 path21x<- mxPath( from="A1x", to="phen2x" , arrows=1 , free=TRUE , value=.15, label="a21" ) path21y<- mxPath( from="A1y", to="phen2y" , arrows=1 , free=TRUE , value=.15, label="a21" ) # confidence intervals cimat.cho <- mxMatrix(type="Full",nrow=3,ncol=1,free=T ,values=c( .12,.18,.15 ) , labels=c( "a1","a2","a21" ),name="aaa") cialg.cho <- mxAlgebra( aaa[1,1]*aaa[3,1]/sqrt( aaa[1,1]^2 * ( aaa[3,1]^2+aaa[2,1]^2 ) ), name="rg", dimnames=list( NULL ,NULL ) ) ci.cho <- mxCI("rg") # Defining the models the paths paths.cho <- list( path21x, path21y ) modelMZ.cho <- mxModel(model="MZcho", type="RAM", manifestVars=phenVars, latentVars=c(aceVars1, aceVars2 ) , commonPaths, paths.cho , covA1mz, covA2mz, dataMZ , cimat.cho,cialg.cho, ci.cho ) modelDZ.cho <- mxModel(model="DZcho", type="RAM", manifestVars=phenVars, latentVars=c(aceVars1, aceVars2 ) , commonPaths, paths.cho , covA1dz, covA2dz, dataDZ ) multi.cho <- mxFitFunctionMultigroup(c("MZcho","DZcho" ) ) model.cho <- mxModel(model="ACEcho", modelMZ.cho, modelDZ.cho, multi.cho ) # check identification print( mxCheckIdentification( model.cho ) ) # the model is indentified # Run Model fit.cho <- mxRun( model.cho , intervals=T ) print(fit.cho$output$confidenceIntervals ) # lbound estimate ubound #MZcho.rg[1,1] 0.1515944 0.5169708 1 # # Different estimate from above.