# # Latent class analysis script # Authors: Shaunna Clark, Michael Neale # Based on: forum post on OpenMx website # Date: 25 October 2012 # # To do: make it work for Ordinal with more than 2 categories # make it work with continuous variables or mixture of continuous & ordinal # take over the world. # getwd() require(OpenMx) # Read Example Data data <- read.table("http://www.vipbg.vcu.edu/NIDAworkshop2012/Data_Oz_CAN_Sim.dat",na.string="NA", header=TRUE) dim(data) names(data) # The first 3 variables are not being analyzed offset <- 3 nvar <- 12 nthresh <- 1 # Declare number of classes nclass <- 4 # Declare number of random starts nrandom <- 3 LCAfun <- function(data, nclass=1, nrandom=1, nbest=NA) { # if there's no argument for nbest, we don't preselect the starting values based on how well they fit. if (is.na(nbest)) { nbest <- nrandom trialsWithFixed <- FALSE } else { if(nbest>nrandom) { warning(paste('nbest argument',nbest, 'is greater than nrandom argument', nrandom, 'so only',nrandom,' starts will be used')) trialsWithFixed <- FALSE } else { trialsWithFixed <- TRUE } } nvar <- dim(data)[[2]] # Order category the variables - currently only binary.... for(i in 1:nvar) { data[,i] <- as.ordered(data[,i]) } nameList <- names(data) # Make a template model for an item itemModel <- mxModel("Item 1", mxMatrix("Iden", name = "R", nrow = 1, ncol = 1, free=FALSE), mxMatrix("Full", name = "M", nrow = 1, ncol = 1, free=FALSE), mxFIMLObjective(covariance="R", means="M", dimnames=nameList[1], thresholds="ThresholdsClass1[1,]",vector=TRUE)) # Build a list of nvar * nclass models, gathering nvar of them into nclass mxModels modelList <- vector("list",(nvar*nclass)) modelNames <- vector("list",(nvar*nclass)) classModelList <- vector("list",nclass) classModelNames <- vector("list",nclass) for(i in 1:nclass) { for(j in 1:nvar) { temp <- itemModel temp@name <- paste("Class", i, "Item", j, sep="_") temp <- mxModel(temp, mxMatrix("Full", name = paste("ThresholdClass",i,"Item",j,sep="_"), nrow = 1, ncol = 1, values = runif(1), dimnames = list("Threshold",nameList[j]), labels = paste("threshold",i,j,sep="_"), # lbound=-10, ubound=4, free=TRUE) ) temp <- mxModel(temp, mxData(observed=data, type="raw")) tempVar <- nameList[j] temp$objective <- mxFIMLObjective(covariance="R", means="M", dimnames=tempVar, thresholds=paste("ThresholdClass",i,"Item",j,sep="_"),vector=TRUE) # print(c("Pasting",i,j,(i-1)*nvar+j)) modelList[(i-1)*nvar+j] <- temp modelNames[(i-1)*nvar+j] <- paste("Class", i, "Item", j, sep="_") } # Build up objective function for this class using generous doses of paste objectives <- paste(modelNames[((i-1)*nvar+1):(i*nvar)], 'objective', sep=".") modelnumbers <- ((i-1)*nvar+1):(i*nvar) itemObjectives <- objectives itemObjectivesProd <- paste(itemObjectives, collapse = " * ") # algebraString <- paste("mxAlgebra(-2*sum(log(", itemObjectivesProd, ")), name='itemsObj')", sep = "") algebraString <- paste("mxAlgebra(", itemObjectivesProd, ", name='itemsObj')", sep = "") # algebraString <- paste("mxAlgebra(exp(sum(log(", itemObjectivesProd, "))), name='itemsObj')", sep = "") # algebraString <- paste("mxAlgebra(((", itemObjectivesProd, "))), name='itemsObj')", sep = "") algebraObjective <- eval(parse(text=algebraString)[[1]]) # obj <- mxAlgebraObjective("itemsObj",vector=TRUE) # Gather item models classModelList[i] <- mxModel(name=paste("Class",i,sep="_"), modelList[modelnumbers], algebraObjective) classModelNames[i] <- paste("Class", i, sep="_") } # Create class membership probabilities, constrain them to be in the range 0-1, and make their sum equal 1.0 ClassMembershipValuesMat <- mxMatrix("Full", name = "ClassMembershipValues", nrow = nclass, ncol = 1, free=c(FALSE,rep(TRUE,nclass-1)), values=c(1,runif(nclass-1)), labels = c(paste("pclass", 1:nclass, sep="")), lbound=0.0001) ClassMembershipProbabilitiesAlg <- mxAlgebra(ClassMembershipValues %x% (1/sum(ClassMembershipValues)), name="ClassMembershipProbabilities") # Define the objective function objectives <- paste(classModelNames, 'itemsObj', sep=".") modelnumbers <- 1:nclass components <- paste("ClassMembershipProbabilities[", modelnumbers, ",1]", " %x% ", objectives, sep = '') componentsSum <- paste(components, collapse = " + ") algebraString <- paste("mxAlgebra(-2*sum(log(", componentsSum, ")), name='mixtureObj')", sep = "") algebraObjective <- eval(parse(text=algebraString)[[1]]) obj <- mxAlgebraObjective("mixtureObj") lcamodel <- mxModel(paste("LCA_model",nclass,"classes",sep="_"), classModelList, ClassMembershipValuesMat, ClassMembershipProbabilitiesAlg, algebraObjective, obj) # Make a model with a suitable nclass name "LCA_model_3_classes" #assign(lcamodel@name,lcamodel) npars <- length(omxGetParameters(lcamodel)) estimateList <- matrix(runif(nrandom*npars),nrandom,npars) funList <- vector(mode='numeric',nrandom) # Only do this if nbest is not NA # browser() if(trialsWithFixed) { # Seek a decent starting point by evaluating likelihood with fixed random values for all parameters minFun <- Inf for (i in 1:nrandom) { lcamodelFixed <- omxSetParameters(lcamodel, names(omxGetParameters(lcamodel)), values=estimateList[i,]) lcamodelFixed <- omxSetParameters(lcamodelFixed, names(omxGetParameters(lcamodelFixed)), free=F) lcamodelRun <- mxRun(lcamodelFixed, unsafe=T) # This doesn't work due to bug when all parameters are fixed (returns NA for minimum) # funList[i] <- ifelse(is.na(lcamodelRun@output$minimum),Inf,modelList[[i]]@output$minimum) funList[i] <- ifelse(is.na(lcamodelRun$mixtureObj@result),Inf,lcamodelRun$mixtureObj@result) if (funList[i] < minFun) {minFun <- funList[i]; minNum <- i} } # Sort the parameter estimate vectors according to best of the funList's estimateListSorted <- estimateList[order(funList),] } else {estimateListSorted <- estimateList} # Run the job using the nbest of the vectors of starting values modelList <- vector(mode='list',nbest) funList <- vector(mode='numeric',nbest) minFun <- Inf minNum <- NA for (i in 1:nbest) { lcamodel <- omxSetParameters(lcamodel, names(omxGetParameters(lcamodel)), values=estimateListSorted[i]) modelList[i] <- mxRun(lcamodel, unsafe=T) funList[i] <- ifelse(is.na(modelList[[i]]@output$minimum),Inf,modelList[[i]]@output$minimum) if(funList[i] < minFun) {minFun <- funList[i]; minNum <- i} } if (is.na(minNum)) {stop(" Error: no proper solutions found")} model2 <- modelList[[minNum]] #summary(model2) #table(funList) # model@matrices # save.image("LCA_2Classes.Rdata") # ##Making LCA Profile Plot #Load Rdata file. # load("LCA_2Classes.Rdata") #Pulling out thresholds classTease <- matrix(0,nclass,nvar) classPeas <- matrix(0,nclass,nvar) classProbs <- matrix(0,1,nclass) for (i in 1:nclass) { classTease[i,] <- model2@output$estimate[((i-1)*nvar + nclass):((i*nvar) + nclass - 1)] #Converting threshold to probabilities classPeas[i,] <- (1-pnorm(classTease[i,])) classProbs[1,i] <- model2$ClassMembershipProbabilities@result[i,1] } #Plotting Probabilities #colors <- rainbow(nclass, s = 1, v = 1, start = 0, end = max(1,n - 1)/n, alpha = 1) palette("default") plotcolors <- palette() par(mai=c(1.8, .5, .5, 0.5)) # enlarged right margin print(plot(classPeas[1,], type="o", col=plotcolors[1],ylim=c(0,1),axes=FALSE, ann=FALSE)) axis(1,at=1:nvar,lab=nameList) axis(2,las=1,at=c(0,0.2,0.4,0.6,0.8,1)) box() classLegend <- vector('character',nclass) classLegend[1] <- paste("Class ",1,round(classProbs[1,1],3)) for (i in 1:(nclass-1)) { lines(classPeas[i+1,],type="o", pch=22, lty=2, col=plotcolors[i+1]) classLegend[i+1] <- paste("Class ",i+1,round(classProbs[1,i+1],3)) } title(main=(paste("LCA ",nclass," Class Profile Plot, -2lnL=",round(model2@output$minimum,2))), col.main="black",font.main=4) title(xlab="DSM Items", col.lab="black") title(ylab="Probability", col.lab="black") #legend("right", legend=c("one", "two"), inset=-0.2, xpd=NA) legend(1,-.2,classLegend, cex=0.8, col=plotcolors,pch=21:22,lty=1:2,xpd=NA) ##Calculating AIC and BIC LL_2c <- model2@output$Minus2LogLikelihood #LL_2c nsam = dim(data)[1] #parameters npar <- (nclass-1) + (nthresh*nvar*nclass) #npar AIC_2c = 2*npar + LL_2c #AIC_2c BIC_2c = LL_2c + (npar*log(nsam)) #BIC_2c return(list(modelFit=model2, fits=funList)) } LCA2 <- LCAfun(data[4:15], 2, 5, 3) LCA3 <- LCAfun(data[4:15], 3, 10, 2) # LCA4 <- LCAfun(data[4:15], 4, 3) # LCA5 <- LCAfun(data[4:15], 5, 3) # LCA6 <- LCAfun(data[4:15], 6, 3) # LCA7 <- LCAfun(data[4:15], 7, 3) # LCA7b <- LCAfun(data[4:15], 7, 8) #