## Replicating a dataset- script by R. Estabrook ## modified (slightly) by JE fakefac<-function(testAdhd2, digits=5, n=NA, use.names=TRUE, use.levels=TRUE, use.miss=TRUE, mvt.method="eigen", het.ML=FALSE, het.suppress=TRUE){ mixedMeans<-rep(0,col) mixedMeans[num]<-apply(testAdhd2[,num], 2, mean, na.rm=TRUE) ## estimating an heterogeneous correlation matrix require(mvtnorm) require(polycor) if(het.suppress==TRUE) {suppressWarnings(het<-hetcor(testAdhd2, ML=het.ML)) } else(het<-hetcor(testAdhd2, ML=het.ML)) mixedCov<-het$correlations ##now I am making a diagonal matrix of SD to turn the ## correlation matrix into a covariance matrix stand<-matrix(0, col, col) diag(stand)<-rep(1, col) diag(stand)[num]<-apply(testAdhd2[,num], 2, sd, na.rm=TRUE) ## pre and post multiplying hetero cor matrix by diagonal sd matrix mixedCov<-stand %*% mixedCov %*% stand ## now generating data fakefac<-as.data.frame(rmvnorm(row, mixedMeans, mixedCov, mvt.method)) ## inserting missing data if so requested if(use.miss==TRUE)(fakefac[del]<-NA) ## turn the required continuous variables into factors for(i in (1:col)[!num]){ # the orginal data for this column old<-testAdhd2[,i] ## the new data for this column, omiting NA's new<-fakefac[!is.na(fakefac[,i]), i] ## what are the levels of the original factor lev<-levels(old) ## establish cutpoints in new variables from cdf of old factor cut<-cumsum(table(old))/(sum(!is.na(old))) ## putting continuous variable into a matrix, repeating value across columns wide<-matrix(new, length(new), length(lev)) ## putting the cutpoints in a matrix repeating the cut point values across rows crit<-matrix(quantile(new, cut), length(new), length(lev), byrow=TRUE) # for each value (row of the wide matrix), # how many cutpoints is the value greater than? # number of cutpoints surpassed category fakefac[!is.na(fakefac[,i]),i]<-apply(wide > crit, 1, sum) ## make it a factor fakefac[,i]<-factor(fakefac[,i], ordered=TRUE) ## giving the new factor the same levels as the old variables if(length(levels(fakefac[,i]))!=length(lev))message( paste("Fewer categories in simulated variable", names(fakefac)[i], "than in inout variable", names(testAdhd2)[i])) if(use.levels==TRUE&(length(levels(fakefac[,i]))==length(lev))){ levels(fakefac[,i])<-lev} else (levels(fakefac[,i])<- 1:length(lev)) } ## now rounding the data to the requested digits fakefac[,num]<-round(fakefac[, num], 2) ## give the variables names, if so requested if(use.names==TRUE)(names(fakefac)<-names(testAdhd2)) ## returning new data return(fakefac) }