### This script is divided in two parts.
# PART 1 is MZ versus DZ twins intra-class correlations for tvaluesinfParietalLobuleRight variable
# PART 2 is ACE Univariate twin modelling for the e.g. tvaluesinfParietalLobuleRight variable
## Choose ACE if DZ intra-class correlation is less than half the MZ intra-class correlation 

#### PART 1 ####

################################################
##        Twin-Based Introduction to R        ##
##      First, Get MZ & DZ correlations!      ##
## Script provided by Elizabeth Prom-Wormley  ##
##                In August 2019              ##
##    Slightly edited by: Arthur Montalto     ##
##                                            ##
################################################

## This link provides some good introductory explanations to 
## compare base R graphics 
## and ggplot2 graphics which you will be learning later in the course.
## http://seriousstats.wordpress.com/2012/09/05/highqualitygraphs/
## https://www.datanovia.com/en/blog/top-r-color-palettes-to-know-for-great-data-visualization/#viridis-color-palettes

# Code to install packAges.  You only need to do this once.
#install.packAges('psych', dependencies=TRUE)
#install.packAges('gplots', dependencies=TRUE)
#install.packAges('car', dependencies=TRUE)
#install.packAges('descr', dependencies=TRUE)
#install.packAges('gmodels', dependencies=TRUE)
#install.packAges("RColorBrewer")

# Code to run packAges in a given session. 
# You need to do this every time you have a new R session.
library(OpenMx)
library(psych)
library(car)
library(gmodels)
library(descr)
library(gplots)
library(RColorBrewer)

# Setting working directory. Your address will be different.
setwd("V:/TWIN-E/arthurCopies")
## Trauma exposed sub-sample ##
#Data <- read.csv('dataMultivariate.csv')

#setwd("E:/Brisbane/Twin Workshop 2019/Elizabeth")
#Data<- read.table(file='tw19wide_v3.txt',header=T,sep="\t")

## Trauma and non-trauma exposed sub-samples ##
Data <- read.csv('dataMultivariateTraumaNonTrauma.csv')
#Data<-Data[!Data$family_ID==518,] # This one is outlier in some variables 

## WORKING WITH TWIN DATA ##
## Let's start getting a feel for the data
dim(Data)
names(Data)
head(Data)

## How many MZ & DZ (MZ=0)
table(Data$Zygosity)

## Dividing dataset in MZ dataset and DZ dataset
MZ<-subset(Data,Data$Zygosity==0)
DZ<-subset(Data,Data$Zygosity==1)

# Scatterplot of twin relationships #
par(mfcol=c(2,2))
plot(MZ$tvaluesinfParietalLobuleRight_1,MZ$tvaluesinfParietalLobuleRight_2, col="red")
plot(DZ$tvaluesinfParietalLobuleRight_1,DZ$tvaluesinfParietalLobuleRight_2, col="black")
plot(MZ$Age_1,MZ$Age_2, col="red")
plot(DZ$Age_1,DZ$Age_2, col="blue")
par(mfrow=c(1,1))
# Adding a loess curve 
#par(mfcol=c(1,1))
#pairs(MZ[c(4,29)],panel=panel.smooth, main="Scatterplot of MZ pairs")
#pairs(DZ[c(4,29)],panel=panel.smooth, main="Scatterplot of DZ pairs")


##############################
# Graphing Twin Correlations #
##############################

# Calculating Twin Correlations by Zygosityosity #

### MZ correlation
rMZ<-cor(MZ$tvaluesinfParietalLobuleRight_1,MZ$tvaluesinfParietalLobuleRight_2,use="pairwise.complete.obs")
## With confidance intervales to be reported in paper
rMZ_conf<-cor.test(MZ$tvaluesinfParietalLobuleRight_1,MZ$tvaluesinfParietalLobuleRight_2,use="pairwise.complete.obs")
rMZ_conf
### DZ correlation
rDZ<-cor(DZ$tvaluesinfParietalLobuleRight_1,DZ$tvaluesinfParietalLobuleRight_2,use="pairwise.complete.obs")
## With confidance intervale to be reported in paper
rDZ_conf<-cor.test(DZ$tvaluesinfParietalLobuleRight_1,DZ$tvaluesinfParietalLobuleRight_2,use="pairwise.complete.obs")
rDZ_conf
# Combining results into one object #
Corr2<-cbind(rMZ, rDZ)
write.table(Corr2,file="corrs.csv",sep=",",row.names=FALSE,col.names=FALSE)

#setup overall background to make transfer to slides prettier
par(bg="white")

#making barplot of twin correlations and putting a box around figure
barplot(Corr2,ylim=c(0,1),col=c("cyan"),
        cex.axis=1,cex.names=1,space=0.1)

# Same barplot with a color palette other than Base R
# via the RColorBrewer packAge.  See below for more 
# code to get started with RColorBrewer
barplot(Corr2[c(1,2)],ylim=c(0,1),
        col=c("#1D91C0","#41B6C4"),
        cex.axis=1,cex.names=1,space=0.1)
legend("topright",legend=c("rMZ", "rDZ"), pch=15, 
       pt.cex=1.5,col=c("#1D91C0","#41B6C4"))

box()


#### PART 2 ####


####################################################################
# Program: UnivarACEcontDirSymm.R  
#  Author: Elizabeth Prom-Wormley
#    Date: August 28, 2019 
# Edited by: Miranda Chilver
#           Arthur Montalto
# Twin Univariate ACE model to estimate causes of variation across 
# multiple groups. Direct symmetric.
# Matrix style model - Raw data - Continuous data
# ###################################################################

# Load Libraries & Options
#library(OpenMx)
#library(psych)
#install.packAges("magrittr") # packAge installations are only needed the first time you use it
#install.packAges("dplyr")    # alternative installation of the %>%
library(magrittr) # needs to be run every time you start R and want to use %>%
library(dplyr)    # alternatively, this also loads %>%
mxOption(NULL,"Default optimizer","CSOLNP")
#setwd("E:/Brisbane/Twin Workshop 2019/Elizabeth")
# PREPARE DATA
#Data<- read.table(file='tw19wide_v3.txt',header=T,sep="\t")

describe(Data)

# Select Variables for Analysis
vars      <- 'tvaluesinfParietalLobuleRight'                  # list of variables names
nv        <- 1                         # number of variables
ntv       <- nv*2                      # number of total variables
selVars   <- paste(vars,"_",c(rep(1,nv),rep(2,nv)),sep="")
covVars   <- c('Age_1','Gender_1','Gender_2')

# Select Data for Analysis
mzData    <- subset(Data, Zygosity==0, c(selVars, covVars))
dzData    <- subset(Data, Zygosity==1, c(selVars, covVars))
describe(mzData)
# Create Labels for means
labMeZ   <- c("mZ1","mZ2")          


# Check descriptives to pick better starting values
#var(mzData[,1:2])[1]/3
#var(dzData[,1:2])[1]


# PREPARE MODEL
# Create Matrices for Covariates and linear Regression Coefficients
defAge      <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, 
                         labels=c("data.Age_1"), name="Age" )
defGender1     <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, 
                         labels=c("data.Gender_1"), name="Gender1" )
defGender2     <- mxMatrix( type="Full", nrow=1, ncol=1, free=FALSE, 
                         labels=c("data.Gender_2"), name="Gender2" )
pathB1     <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, 
                        values=.01, label="beta1", name="b1" ) # Age effect
pathB2     <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, 
                        values=.01, label="beta2", name="b2" ) # Gender effect
pathB3     <- mxMatrix( type="Full", nrow=1, ncol=1, free=TRUE, 
                        values=.01, label="beta3", name="b3" ) # Age x Gender effect

# Create Algebra for expected Mean Matrices
meanZ    <- mxMatrix( type="Full", nrow=1, ncol=ntv, free=TRUE, 
                      values=35, labels=labMeZ, name="meanZ" )
expMeanZ <- mxAlgebra( expression= meanZ + 
                         cbind(b1*Age,b1*Age) + 
                         cbind(b2*Gender1,b2*Gender2) + 
                         cbind(b3*Age*Gender1,b3*Age*Gender2), 
                       name="expMeanZ" )

# Create variance components objects
compA     <- mxMatrix( type="Lower", nrow=1, ncol=1, free=TRUE, 
                       labels="A11", values=5, name="VA" )
compC     <- mxMatrix( type="Lower", nrow=1, ncol=1, free=TRUE, 
                       labels="C11", values=5, name="VC" )
compE     <- mxMatrix( type="Lower", nrow=1, ncol=1, free=TRUE, 
                       labels="E11", values=5, name="VE" )
sumACE <- mxAlgebra(expression = VA+VC+VE, name = "VT")

# Specify expected var-covar matrices
covMZ     <- mxAlgebra( expression= rbind( cbind(VT,VA+VC), 
                                           cbind(VA+VC,VT)), 
                        name="expCovMZ" )
covDZ     <- mxAlgebra( expression= 
                          rbind( cbind(VT, 0.5%x%VA+VC), 
                                 cbind(0.5%x%VA+VC, VT)), 
                        name="expCovDZ" )

# Data objects for Multiple Groups
dataMZ    <- mxData( observed=mzData, type="raw" )
dataDZ    <- mxData( observed=dzData, type="raw" )

# Objective objects for Multiple Groups
expMZ     <- mxExpectationNormal( covariance="expCovMZ", 
                                  means="expMeanZ", dimnames=selVars )
expDZ     <- mxExpectationNormal( covariance="expCovDZ", 
                                  means="expMeanZ", dimnames=selVars )
funML     <- mxFitFunctionML()

# Create standardised estimates of var components (for CIs)
stA <- mxAlgebra(expression = VA/VT, name = "stVA")
stC <- mxAlgebra(expression = VC/VT, name = "stVC")
stE <- mxAlgebra(expression = VE/VT, name = "stVE")
# Confidence intervals
ci <- mxCI(c("stVA","stVC","stVE"))
cipars <- list(stA,stC,stE,ci)

# Combine Groups
defs      <- list(defAge, defGender1, defGender2)
pars      <- list(pathB1, pathB2, pathB3, compA, compC, compE, sumACE)

modelMZ   <- mxModel( name="MZ", pars, defs, meanZ, expMeanZ, 
                      covMZ, dataMZ, expMZ, funML, cipars )
modelDZ   <- mxModel( name="DZ", pars, defs, meanZ, expMeanZ, 
                      covDZ, dataDZ, expDZ, funML )
names(modelMZ)
multi     <- mxFitFunctionMultigroup( c("MZ","DZ") )

modelACE  <- mxModel( "ACE", modelMZ, modelDZ, multi )
names(multi)
# Run AiCiEi model
mxOption(NULL, "Default optimizer", "CSOLNP")
ACEFit    <- mxRun(modelACE, intervals = F)
summary(ACEFit)
names(ACEFit)

# Run AE model
AEmodel   <- mxModel( ACEFit, name="AEmodel" )
AEmodel   <- omxSetParameters( AEmodel, labels="C11", free=F, values=0 )
AEfit     <- mxRun( AEmodel, intervals=T )
summary(AEfit)


mxCompare(ACEFit, AEfit)


# Run CE model
CEmodel   <- mxModel( ACEFit, name="CEmodel" )
CEmodel   <- omxSetParameters( CEmodel, labels="A11", free=F, values=0 )
CEfit     <- mxRun( CEmodel, intervals=T )
summary(CEfit)
mxTryHard(CEmodel)
# Run E model
Emodel    <- mxModel( AEfit, name="Emodel" )
Emodel    <- omxSetParameters( Emodel, labels="A11", free=F, values=0 )
Efit      <- mxRun( Emodel, intervals=T )
summary(Efit)

# CIs in parallel
#require(snowfall)
#require(parallel)
#ncores <- detectCores()
#sfInit(parallel=TRUE, cpus=ncores )
#sfLibrary(OpenMx)

#ACEmFitCi    <- omxParallelCI(ACEFit)
#summary(ACEmFitCi)


# Model comparisons

Nested.fit <- rbind(mxCompare(ACEFit, AEfit) [2,],
                    
                    mxCompare(ACEFit, CEfit) [2,],
                    
                    mxCompare(AEfit, Efit) [2,],
                    
                    mxCompare(CEfit, Efit) [2,])

Nested.fit

ACENested <- list(AEfit,CEfit,Efit)
mxCompare(ACEFit,ACENested)         # A significant p-value means that the restricted (also called comparison (e.g. AE)) model 
# doesn't fit as well as the unrestricted (also called base (e.g. ACE)) model.
# Path estimates and variance components

ACE_estimates <- mxEval(cbind(MZ.stVA, MZ.stVC, MZ.stVE, MZ.VA, MZ.VC, MZ.VE),
                        
                        modelACE, compute = T)

AE_estimates  <- mxEval(cbind(MZ.stVA, MZ.stVC, MZ.stVE, MZ.VA, MZ.VC, MZ.VE),
                        
                        AEmodel, compute = T)

CE_estimates  <- mxEval(cbind(MZ.stVA, MZ.stVC, MZ.stVE, MZ.VA, MZ.VC, MZ.VE),
                        
                        CEmodel, compute = T)

E_estimates   <- mxEval(cbind(MZ.stVA, MZ.stVC, MZ.stVE, MZ.VA, MZ.VC, MZ.VE),
                        
                        Emodel, compute = T)



output <- rbind(ACE_estimates, AE_estimates, CE_estimates, E_estimates) %>% round(4)



rownames(output) <- c('modelACE', 'AEmodel', 'CEmodel', 'Emodel')

colnames(output) <- c('A', 'C', 'E', 'a', 'c', 'e')

### Finally, get the heritability estimates

output

### Good luck!