# Program:  Longitudinal Twin Analysis
#  Author:  Jamie Newsome
#    Date:  07 18 2010
#
# Longitudinal Twin Saturated model to estimate means and (co)variances across multiple groups
# Longitudinal Cholesky ACE model to estimate genetic and environmental sources of variance 
# Longitudinal Simplex Model and Linear Growth Curve Model
# Matrix style model input - Raw data input
#
# -------------------------------------------------------------------------

require (OpenMx)
source ("http://www.vipbg.vcu.edu/~vipbg/Tc24/GenEpiHelperFunctions.R")

# Load Data
# ----------------------------------------------------------------------------
mydata <- read.csv("C:\\Users\\Jamie\\Documents\\IRT_EXT.csv")

# Describe Data
# ----------------------------------------------------------------------------
require (psych)
describe (mydata)

# Prepare Data
# ----------------------------------------------------------------------------
allVars <- c('zyg','GENDER_1','GENDER_2',
'FKTOT_1','FKTOT_2','SFTOT_1','SFTOT_2',
'STTOT_1','STTOT_2','FTOT_1','FTOT_2',
'SETOT_1','SETOT_2')
# Total IRT scores Fall K, Spring 1, Spring 3, Spring 5, Spring 8

selVars <-c('FKTOT_1','SFTOT_1','STTOT_1','FTOT_1','SETOT_1','FKTOT_2','SFTOT_2','STTOT_2','FTOT_2','SETOT_2')
Vars <- c('FKTOT','SFTOT','STTOT','FTOT','SETOT')
# Latent Growth Curve at Kindergarten, First, Third, Fifth, and Eighth Grade
LatentVars <- c('Int','Slope')
selVars2 <- c('GENDER_1','GENDER_2')
# Includes sex

# Extract MZ and DZ Data Sets
# ----------------------------------------------------------------------------
mzData <- subset(mydata, zyg==2, selVars) # Male and Female MZ twin pairs
dzData <- subset(mydata, zyg==1, selVars) # Male and Female DZ twin pairs
mzDefs <- subset(mydata, zyg==2, selVars2)
dzDefs <- subset(mydata, zyg==1, selVars2)

summary(mzData)
summary(dzData)
summary(mzDefs)
summary(dzDefs)
dim(mzDefs)
dim(mzData)

nv  <- 5     # Number of variables includes one measure at each wave
nf  <- 2     # Number of factors includes Latent Intercept & Slope
ntv <- nv*2  # Number of twin variable is number of vars by 2


# Print Descriptive Statistics
# -----------------------------------------------------------------------
summary(mzData)
colMeans(mzData,na.rm=TRUE)
cov(mzData,use="complete")
summary(dzData)
colMeans(dzData,na.rm=TRUE)
cov(dzData,use="complete")


# Fit Latent Growth ACE Model with Raw Continuous Data and Matrices Input
# Includes sex effects (Betas) on latent Int & Slope means
# -----------------------------------------------------------------------

lgcACEModel <- mxModel("lgcACE",
    mxModel("ACE",
	 # Matrices X, Y, and Z to store a, c, and e path coefficients for Interecept and Slope latent factors
		mxMatrix( type="Lower", nrow=nf, ncol=nf, free=T, values=0.2, name="a" ), # Genetic effects on Int & Slope 
		mxMatrix( type="Lower", nrow=nf, ncol=nf, free=T, values=0.2, name="c" ), # Common env' effects on Int & Slope 
		mxMatrix( type="Lower", nrow=nf, ncol=nf, free=T, values=0.2, name="e" ), # Non-shared env' effects on Int & Slope 

	 # Matrices T, U, and V to store a, c, and e path coefficients for residuals of observed phenotypes
		mxMatrix( type="Diag",  nrow=nv, ncol=nv, free=T, values=0.2, name="as" ), # Specific genetic effects on obs phenotypes
		mxMatrix( type="Diag",  nrow=nv, ncol=nv, free=T, values=0.2, name="cs" ), # Specific common env effects on obs phenotypes
		mxMatrix( type="Diag",  nrow=nv, ncol=nv, free=T, values=0.2, name="es" ), # Specific non-shared env' effects

	 # Factor loading matrix of Intercept and Slope on observed phenotypes
        #mxMatrix( type="Full", nrow=nv, ncol=2, free=F, values=c(rep(1,nv), 0:nv-1), name="F" ),  # Factor loadings from
   	    mxMatrix( type="Full", nrow=nv, ncol=2, free=F, values=c(1,1,1,0,1,2), name="F" ),         # Int & Slope to obs phenotypes
#MATRIX OF THE FACTOR LOADINGS IS ABOVE, DOWN COLUMN FIRST, THEN ACROSS, 0, 1, 2 IS FOR THE CHANGE ACROSS TIME         
   	    
	 # Total A, C, and E variance components (latents factors + residuals)
	    mxAlgebra( expression= F %&% (a %*% t(a)) + as %*% t(as), name="A" ), # Total genetic variance/covariance
	    mxAlgebra( expression= F %&% (c %*% t(c)) + cs %*% t(cs), name="C" ), # Total shared env' variance/covariance 
	    mxAlgebra( expression= F %&% (e %*% t(e)) + es %*% t(es), name="E" ), # Total non-shared env'  variance/covariance  
	    mxAlgebra( expression= A+C+E, name="Rt" ), # Total variance
        mxMatrix( type="Iden", nrow=nv, ncol=nv, name="It"), # Identity matrix (removes off diagonals)
	    mxAlgebra( expression=solve(sqrt(It*Rt)), name="SDt"), # Standardized deviations (inverse)

	    mxAlgebra( expression= F %&% (a %*% t(a)), name="LatentA" ), # Total genetic variance/covariance Int & Slope
	    mxAlgebra( expression= F %&% (c %*% t(c)), name="LatentC" ), # Total shared env' variance/covariance Int & Slope 
	    mxAlgebra( expression= F %&% (e %*% t(e)), name="LatentE" ), # Total non-shared env'  variance/covariance Int & Slope  
	    mxAlgebra( expression= as %*% t(as), name="ResA" ), # Total genetic variance/covariance Residuals
	    mxAlgebra( expression= cs %*% t(cs), name="ResC" ), # Total shared env' variance/covariance Residuals 
	    mxAlgebra( expression= es %*% t(es), name="ResE" ), # Total non-shared env'  variance/covariance Residuals 

	 # Slope & Intercept (co)variance + standard deviations
	    mxAlgebra( expression=a %*% t(a), name="Al" ), # Slope & Intercept genetic (co)variance
		mxAlgebra( expression=c %*% t(c), name="Cl" ),
		mxAlgebra( expression=e %*% t(e), name="El" ),
	    mxAlgebra( expression= Al+Cl+El, name="Rf" ), # Total Slope & Intercept  (co)variance
	    mxMatrix( type="Iden", nrow=nf, ncol=nf, name="Ident"),
	    mxAlgebra( expression=(solve(sqrt(Ident*Rf)) ), name="SDf"),  # Inverse of the standard deviations 

     # Means & Betas for Intercept and Slope     
        mxMatrix( type="Full", nrow=2, ncol=1, free=T, values=c(1,0.1), labels=c("Im","Sm"), name="Mean" ), # Int & Slope means
		mxMatrix( type="Full", nrow=2, ncol=1, free=F, values=0,     labels=c("Bi","Bs"), name="Beta" ), # Sex effects on means


#SET THE FREE TO FALSE AND VALUES TO 0 IN ORDER TO RUN THE MODEL WITHOUT SEX - THIS SETS THE BETAS TO 0
 
#2X1 MATRIX FOR INTERCEPT AND SLOPE

     # Algebra for expected variance/covariance matrice for MZs and DZs 
   		mxAlgebra(expression=
   					 (rbind(cbind(A+C+E, A+C),
   					        cbind(A+C, A+C+E))), name="expCovMZ" ),
   		mxAlgebra(expression=
   					 (rbind(cbind(A+C+E, 0.5%x%A+C),
   					        cbind(0.5%x%A+C, A+C+E))), name="expCovDZ" )
     ),



#MZ AND DZ EXPECTED MATRICES

    # Algebra for expected Int & Slope means + sex effects (betas) for MZs
		mxModel("MZ",
	     mxData(data.frame(mzData,mzDefs), type="raw",) # Requests MZ definition variables: GENDER_1 & GENDER_2
		 mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data_GENDER_1"), name="GENDER_1"), # Selects GENDER_1 from mzDefs
         mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data_GENDER_2"), name="GENDER_2"), # Selects GENDER_2 from mzDefs
#1X1 MATRIX OF THE TWINS SEX ABOVE FOR TWIN 1S SEX AND TWIN 2S SEX
         # Classic Mx means model=	(F * (M+(sex_t1@Betas)))'  | (F * (M+(sex_t2@Betas)))' 
		mxAlgebra(expression=cbind( t(ACE.F %*% ( ACE.Mean + (GENDER_1 %x% ACE.Beta))),
										t(ACE.F %*% ( ACE.Mean + (GENDER_2 %x% ACE.Beta))) ), name="expMean"),
	    mxFIMLObjective( covariance="ACE.expCovMZ", means="expMean", dimnames=selVars ) ),
#ABOVE SPECIFY THE COVARIANCE MZ MATRIX AND EXPECTED MEANS, THEN REPEAT THIS BELOW FOR THE DZ TWINS

    # Algebra for expected Int & Slope means + sex effects (betas) for DZs
		mxModel("DZ", 
		 mxData(data.frame(dzData,dzDefs), type="raw" ), # Requests DZ definition variables: GENDER_1 & GENDER_1
		 mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data_GENDER_1"), name="GENDER_1"), # Selects only GENDER_1 from mzDefs
         mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, labels=c("data_GENDER_2"), name="GENDER_2"), # Selects only GENDER_2 from mzDefs
	     mxAlgebra(expression=cbind( t(ACE.F %*% ( ACE.Mean + (GENDER_1 %x% ACE.Beta))),
										t(ACE.F %*% ( ACE.Mean + (GENDER_2 %x% ACE.Beta))) ), name="expMean"),
	    mxFIMLObjective( covariance="ACE.expCovDZ", means="expMean", dimnames=selVars ) ),

        mxAlgebra( expression=MZ.objective + DZ.objective, name="sumll" ),
	    mxAlgebraObjective("sumll")
)

lgcACEFit <- mxRun(lgcACEModel) 		# Run Latent Growth Curve ACE model
lgcACESumm <- summary(lgcACEFit)
lgcACESumm

parameterSpecifications(lgcACEFit)
expectedMeansCovariances(lgcACEFit)
tableFitStatistics(lgcACEFit)