# 
# Latent class analysis script
# Authors: Shaunna Clark, Michael Neale
# Based on: forum post on OpenMx website
# Date: 25 October 2012
#
# To do: make it work for Ordinal with more than 2 categories
#        make it work with continuous variables or mixture of continuous & ordinal
#        take over the world.
#

getwd()
require(OpenMx)

# Read Example Data
data <- read.table("http://www.vipbg.vcu.edu/NIDAworkshop2012/Data_Oz_CAN_Sim.dat",na.string="NA", header=TRUE)
dim(data)
names(data)

# The first 3 variables are not being analyzed
offset <- 3
nvar <- 12
nthresh <- 1

# Declare number of classes
nclass <- 4

# Declare number of random starts
nrandom <- 3

LCAfun <- function(data, nclass=1, nrandom=1, nbest=NA)
{
	# if there's no argument for nbest, we don't preselect the starting values based on how well they fit.

	if (is.na(nbest))
	{
		nbest <- nrandom
		trialsWithFixed <- FALSE
	}
	else
	{
			if(nbest>nrandom)
			{
				warning(paste('nbest argument',nbest, 'is greater than nrandom argument', nrandom, 'so only',nrandom,' starts will be used'))
				trialsWithFixed <- FALSE
			}
			else
			{
				trialsWithFixed <- TRUE
			}
	}
nvar <- dim(data)[[2]]
# Order category the variables - currently only binary....
for(i in 1:nvar) {
	data[,i] <- as.ordered(data[,i])
	}

nameList <- names(data)

# Make a template model for an item

itemModel <-    mxModel("Item 1", 
                mxMatrix("Iden", name = "R", nrow = 1, ncol = 1, free=FALSE),
                mxMatrix("Full", name = "M", nrow = 1, ncol = 1, free=FALSE),
                mxFIMLObjective(covariance="R", means="M", dimnames=nameList[1], 
            	    thresholds="ThresholdsClass1[1,]",vector=TRUE))
            	    
# Build a list of nvar * nclass models, gathering nvar of them into nclass mxModels
modelList <- vector("list",(nvar*nclass))
modelNames <- vector("list",(nvar*nclass))
classModelList <- vector("list",nclass)
classModelNames <- vector("list",nclass)

for(i in 1:nclass) 
{
    for(j in 1:nvar)
    {
    temp <- itemModel
    temp@name <- paste("Class", i, "Item", j, sep="_")
    temp <- mxModel(temp, 
        mxMatrix("Full", 
            name = paste("ThresholdClass",i,"Item",j,sep="_"),
            nrow = 1, 
            ncol = 1,
            values = runif(1), 
            dimnames = list("Threshold",nameList[j]),
            labels = paste("threshold",i,j,sep="_"),
#            lbound=-10, ubound=4, 
            free=TRUE)
            )
	temp <- mxModel(temp, mxData(observed=data, type="raw"))
	tempVar <- nameList[j]
    temp$objective <- mxFIMLObjective(covariance="R", means="M", dimnames=tempVar, 
	    thresholds=paste("ThresholdClass",i,"Item",j,sep="_"),vector=TRUE)
#    print(c("Pasting",i,j,(i-1)*nvar+j))
    modelList[(i-1)*nvar+j] <- temp
    modelNames[(i-1)*nvar+j] <- paste("Class", i, "Item", j, sep="_")
    }
        # Build up objective function for this class using generous doses of paste
        objectives <- paste(modelNames[((i-1)*nvar+1):(i*nvar)], 'objective', sep=".")
        modelnumbers <- ((i-1)*nvar+1):(i*nvar)
        itemObjectives <- objectives
        itemObjectivesProd <- paste(itemObjectives, collapse = " * ")
#        algebraString <- paste("mxAlgebra(-2*sum(log(", itemObjectivesProd, ")), name='itemsObj')", sep = "")
        algebraString <- paste("mxAlgebra(", itemObjectivesProd, ", name='itemsObj')", sep = "")
#        algebraString <- paste("mxAlgebra(exp(sum(log(", itemObjectivesProd, "))), name='itemsObj')", sep = "")
#        algebraString <- paste("mxAlgebra(((", itemObjectivesProd, "))), name='itemsObj')", sep = "")
        algebraObjective <- eval(parse(text=algebraString)[[1]])
#        obj <- mxAlgebraObjective("itemsObj",vector=TRUE)
        # Gather item models
        classModelList[i] <- mxModel(name=paste("Class",i,sep="_"), modelList[modelnumbers], algebraObjective) 
        classModelNames[i] <- paste("Class", i, sep="_")
}

# Create class membership probabilities, constrain them to be in the range 0-1, and make their sum equal 1.0
ClassMembershipValuesMat    <-  mxMatrix("Full", name = "ClassMembershipValues", nrow = nclass, ncol = 1, free=c(FALSE,rep(TRUE,nclass-1)),
                                values=c(1,runif(nclass-1)), labels = c(paste("pclass", 1:nclass, sep="")), lbound=0.0001)
ClassMembershipProbabilitiesAlg <-  mxAlgebra(ClassMembershipValues %x% (1/sum(ClassMembershipValues)), name="ClassMembershipProbabilities")
# Define the objective function
objectives <- paste(classModelNames, 'itemsObj', sep=".")
modelnumbers <- 1:nclass
components <- paste("ClassMembershipProbabilities[", modelnumbers, ",1]", " %x% ", objectives, sep = '')
componentsSum <- paste(components, collapse = " + ")
algebraString <- paste("mxAlgebra(-2*sum(log(", componentsSum, ")), name='mixtureObj')", sep = "")
algebraObjective <- eval(parse(text=algebraString)[[1]])
obj <- mxAlgebraObjective("mixtureObj")

lcamodel <- mxModel(paste("LCA_model",nclass,"classes",sep="_"), classModelList, ClassMembershipValuesMat, ClassMembershipProbabilitiesAlg, algebraObjective, obj)
# Make a model with a suitable nclass name "LCA_model_3_classes"

#assign(lcamodel@name,lcamodel)

npars <- length(omxGetParameters(lcamodel))
estimateList <- matrix(runif(nrandom*npars),nrandom,npars)
funList <- vector(mode='numeric',nrandom)
 # Only do this if nbest is not NA
# browser()
if(trialsWithFixed)
 {
 	# Seek a decent starting point by evaluating likelihood with fixed random values for all parameters 
 	minFun <- Inf
 	for (i in 1:nrandom) 
 	{
 			lcamodelFixed <- omxSetParameters(lcamodel, names(omxGetParameters(lcamodel)), values=estimateList[i,])
 			lcamodelFixed <- omxSetParameters(lcamodelFixed, names(omxGetParameters(lcamodelFixed)), free=F)
 			lcamodelRun <- mxRun(lcamodelFixed, unsafe=T)
 			#  This doesn't work due to bug when all parameters are fixed (returns NA for minimum)
 			#		funList[i] <- ifelse(is.na(lcamodelRun@output$minimum),Inf,modelList[[i]]@output$minimum)
 			funList[i] <- ifelse(is.na(lcamodelRun$mixtureObj@result),Inf,lcamodelRun$mixtureObj@result) 
 		    if (funList[i] < minFun) {minFun <- funList[i]; minNum <- i}
 	 }
 # Sort the parameter estimate vectors according to best of the funList's
 estimateListSorted <- estimateList[order(funList),]
} 
else
	{estimateListSorted <- estimateList} 
 # Run the job using the nbest of the vectors of starting values
 modelList <- vector(mode='list',nbest)
 funList <- vector(mode='numeric',nbest)
 minFun <- Inf
 minNum <- NA
 for (i in 1:nbest) 
 {
 	lcamodel <- omxSetParameters(lcamodel, names(omxGetParameters(lcamodel)), values=estimateListSorted[i])
 	modelList[i] <- mxRun(lcamodel, unsafe=T)
 	funList[i] <- ifelse(is.na(modelList[[i]]@output$minimum),Inf,modelList[[i]]@output$minimum)
 	if(funList[i] < minFun) {minFun <- funList[i]; minNum <- i}
 }
 
if (is.na(minNum)) {stop(" Error: no proper solutions found")}
model2 <- modelList[[minNum]]

#summary(model2)
#table(funList)

# model@matrices
# save.image("LCA_2Classes.Rdata")
# 
##Making LCA Profile Plot

#Load Rdata file.
# load("LCA_2Classes.Rdata")

#Pulling out thresholds

classTease <- matrix(0,nclass,nvar)
classPeas <- matrix(0,nclass,nvar)
classProbs <- matrix(0,1,nclass)

for (i in 1:nclass) {
classTease[i,] <- model2@output$estimate[((i-1)*nvar + nclass):((i*nvar) + nclass - 1)]
#Converting threshold to probabilities
classPeas[i,] <- (1-pnorm(classTease[i,]))
classProbs[1,i] <- model2$ClassMembershipProbabilities@result[i,1]
}

#Plotting Probabilities
#colors <- rainbow(nclass, s = 1, v = 1, start = 0, end = max(1,n - 1)/n, alpha = 1)
palette("default")
plotcolors <- palette()
par(mai=c(1.8, .5, .5, 0.5)) # enlarged right margin

print(plot(classPeas[1,], type="o", col=plotcolors[1],ylim=c(0,1),axes=FALSE, ann=FALSE))

axis(1,at=1:nvar,lab=nameList)
axis(2,las=1,at=c(0,0.2,0.4,0.6,0.8,1))
box()
classLegend <- vector('character',nclass)
classLegend[1] <- paste("Class ",1,round(classProbs[1,1],3))
for (i in 1:(nclass-1)) {
lines(classPeas[i+1,],type="o", pch=22, lty=2, col=plotcolors[i+1])
classLegend[i+1] <- paste("Class ",i+1,round(classProbs[1,i+1],3))
}

title(main=(paste("LCA ",nclass," Class Profile Plot, -2lnL=",round(model2@output$minimum,2))), col.main="black",font.main=4)
title(xlab="DSM Items", col.lab="black")
title(ylab="Probability", col.lab="black")

#legend("right", legend=c("one", "two"), inset=-0.2, xpd=NA)
legend(1,-.2,classLegend, cex=0.8, col=plotcolors,pch=21:22,lty=1:2,xpd=NA)


##Calculating AIC and BIC
LL_2c <- model2@output$Minus2LogLikelihood
#LL_2c
nsam = dim(data)[1]
#parameters
npar <- (nclass-1) + (nthresh*nvar*nclass)
#npar

AIC_2c = 2*npar + LL_2c 
#AIC_2c
BIC_2c = LL_2c + (npar*log(nsam))
#BIC_2c

return(list(modelFit=model2, fits=funList))
}

LCA2 <- LCAfun(data[4:15], 2, 5, 3)
LCA3 <- LCAfun(data[4:15], 3, 10, 2)
# LCA4 <- LCAfun(data[4:15], 4, 3)
# LCA5 <- LCAfun(data[4:15], 5, 3)
# LCA6 <- LCAfun(data[4:15], 6, 3)
# LCA7 <- LCAfun(data[4:15], 7, 3)
# LCA7b <- LCAfun(data[4:15], 7, 8)
#