fakeData <- function(dataset, digits=2, n=NA, 
  use.names=TRUE, use.levels=TRUE, use.miss=TRUE,
  mvt.method="eigen", het.ML=FALSE, het.suppress=TRUE){

  # This function takes as argument an existing dataset, which 
  # must be either a matrix or a data frame. Each column of the 
  # dataset must consist either of numeric variables or ordered 
  # factors. When one or more ordered factors are included, 
  # then a heterogeneous correlation matrix is computed using 
  # John Fox' polycor package. Pairwise complete observations 
  # are used for all covariances, and the exact pattern of 
  # missing data present in the input is placed in the output,
  # provided a new sample size is not requested. Warnings from
  # the hetcor function are suppressed.

  # Author:   Ryne Estabrook
  # Created:  17 Aug 2010

  require(mvtnorm)
  require(polycor)

  # requires data frame or matrix
  if((is.data.frame(dataset)+is.matrix(dataset))==0){
    warning("Data must be a data frame or matrix")
  }

  # organization
  row <- dim(dataset)[1] # number of rows
  if(is.na(n))(n <- row) # sets unspecified sample size to num rows
  col <- dim(dataset)[2] # number of columns
  del <- is.na(dataset)  # records position of NAs in dataset
  if(n!=row){
    select <- round(runif(n, 0.5, row+.49),0)
    del <- del[select,]
  }
  num <- rep(NA, col)    # see what's not a factor
  ord <- rep(NA, col)    # see what's an ordered factor

  # which columns are numeric (the others are factors)?
  for (i in 1:col){
    num[i] <- is.numeric(dataset[,i])
    ord[i] <- is.ordered(dataset[,i])
  }

  # check for unordered factors
  location <- !(num|ord)
  unorder <- sum(location)

  if(unorder>0)warning(
    paste("Unordered factor detected in variable(s):", 
      names(dataset)[location]
    )
  )

  # if everything is numeric, don't invoke polycor
  if(sum(!num)==0){
    # generate data with rmvnorm
    fake <- rmvnorm(n, 
      apply(dataset, 2, mean, na.rm=TRUE),
      cov(dataset, use="pairwise.complete.obs"),
      mvt.method)

    # round the data to the requested digits
    fake <- round(fake, digits)

    # insert the missing data, if so requested
    if(use.miss==TRUE)(fake[del] <- NA)

    # give the variables names, if so requested
    if(use.names==TRUE)(names(fake) <- names(dataset))

    # return the new data
    return(fake)
  }

  # if there are factors, we start here

  # find the variable means (constrain to zero for factors)
  mixedMeans <- rep(0, col)
  mixedMeans[cat] <- apply(dataset[,num], 2, mean, na.rm=TRUE)

  # estimate a heterogeneous correlation matrix
  if (het.suppress==TRUE){
    suppressWarnings(het <- hetcor(dataset, ML=het.ML))
  } else (het <- hetcor(dataset, ML=het.ML))
  mixedCov <- het$correlations

  # make a diagonal matrix of standard deviations to turn the 
  # correlation matrix into a covariance matrix
  stand <- matrix(0, col, col)
  diag(stand) <- rep(1, col)
  diag(stand)[num] <- apply(dataset[,num], 2, sd, na.rm=TRUE)
  # pre and post multiply hetero cor matrix by diagonal sd matrix
  mixedCov <- stand %*% mixedCov %*% stand

  # generate the data
  fake <- as.data.frame(rmvnorm(row, mixedMeans, mixedCov, mvt.method))

  # insert the missing data, if so requested
  if(use.miss==TRUE)(fake[del] <- NA)

  # turn the required continuous variables into factors
  for (i in (1:col)[!num]){
    # the original data for this column
    old <- dataset[,i]
   
    # the new data for this column, omiting NAs
    new <- fake[!is.na(fake[,i]),i]

    # what are the levels of the original factor?
    lev <- levels(old)

    # establish cutpoints in new variable from cdf of old factor
    cut <- cumsum(table(old))/(sum(!is.na(old)))

    # put continuous variable into a matrix, repeating value across columns
    wide <- matrix(new, length(new), length(lev))

    # put the cutpoints in a matrix, repeating the cut point values across rows
    crit <- matrix(quantile(new, cut), length(new), length(lev), byrow=TRUE)

    # for each value (row of the wide matrix), 
    # how many cutpoints is the value greater than?
    # number of cutpoints surpassed=category
    fake[!is.na(fake[,i]),i] <- apply(wide>crit, 1, sum)

    # make it a factor
    fake[,i] <- factor(fake[,i], ordered=TRUE)

    # give the new factor the same levels as the old variable
    if(length(levels(fake[,i]))!=length(lev))message(
      paste("Fewer categories in simulated variable", 
      names(fake)[i], "than in input variable", names(dataset)[i]))
    if(use.levels==TRUE&(length(levels(fake[,i]))==length(lev))){
      levels(fake[,i]) <- lev} else (levels(fake[,i]) <- 1:length(lev))
  }

  # round the data to the requested digits
  fake[,num] <- round(fake[,num], digits)

  # give the variables names, if so requested
  if(use.names==TRUE)(names(fake) <- names(dataset))
  
  # return the new data
  return(fake)
}