# ---------------------------------------------- # Program: Transformations1.R # Author: Steven M. Boker # Date: Sun Mar 21 10:00:48 EDT 2010 # # Examples of transformations # # ---------------------------------------------- # ---------------------------------------------- # Load required libraries library(lattice) # ---------------------------------------------- # Create four example distributions x1 <- rnorm(300, mean=0, sd=1) x2 <- rexp(300) x3 <- (1 / rnorm(300, mean=1, sd=.1)) x4 <- rnorm(300, mean=1, sd=.2)^4 xFrame <- data.frame(x=c(x1,x2,x3,x4), xtype=c(rep(1,300), rep(2,300), rep(3,300), rep(4,300) )) # ---------------------------------------------- # Boxplots for the distributions # Separate scales pdf("Trans1Box1.pdf", height=7, width=7) par(mfrow=c(2,2)) boxplot(x1, xlab="", main="x1", col="lightblue") boxplot(x2, xlab="", main="x2", col="lightblue") boxplot(x3, xlab="", main="x3", col="lightblue") boxplot(x4, xlab="", main="x4", col="lightblue") dev.off() # Same scale pdf("Trans1Box2.pdf", height=5, width=6) boxplot(list(x1, x2, x3, x4), names=c("x1", "x2", "x3", "x4"), col="lightblue") dev.off() # ---------------------------------------------- # Histograms for the distributions # Separate scales pdf("Trans1Hist1.pdf", height=7, width=7) par(mfrow=c(2,2)) hist(x1, xlab="", main="x1", col="lightblue") hist(x2, xlab="", main="x2", col="lightblue") hist(x3, xlab="", main="x3", col="lightblue") hist(x4, xlab="", main="x4", col="lightblue") dev.off() # Same scale pdf("Trans1Hist2.pdf", height=5, width=5) print(histogram(~ x | xtype, data=xFrame, layout=c(2,2), aspect=1, nint=20 )) dev.off() # ---------------------------------------------- # Quantile-Normal plots for the distributions pdf("Trans1QQNormX1.pdf", height=5, width=5) print(qqmath(~ x1, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution", cex=1.25), ylab=list(label="X1", cex=1.25), scales=list(cex=1.25) ) ) dev.off() pdf("Trans1QQNormX2.pdf", height=5, width=5) print(qqmath(~ x2, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution", cex=1.25), ylab=list(label="X2", cex=1.25), scales=list(cex=1.25) ) ) dev.off() pdf("Trans1QQNormX3.pdf", height=5, width=5) print(qqmath(~ x3, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution", cex=1.25), ylab=list(label="X3", cex=1.25), scales=list(cex=1.25) ) ) dev.off() pdf("Trans1QQNormX4.pdf", height=5, width=5) print(qqmath(~ x4, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution", cex=1.25), ylab=list(label="X4", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # ---------------------------------------------- # Natural Log Transformation pdf("Trans1QQNormX4Log.pdf", height=5, width=5) print(qqmath(~ log(x4), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution", cex=1.25), ylab=list(label="Natural Log of X4", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # ---------------------------------------------- # Log Base 2 Transformation pdf("Trans1QQNormX4LogBase2.pdf", height=5, width=5) print(qqmath(~ log2(x4), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution", cex=1.25), ylab=list(label="Log Base 2 of X4", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # ---------------------------------------------- # Log Base 10 Transformation pdf("Trans1QQNormX4LogBase10.pdf", height=5, width=5) print(qqmath(~ log10(x4), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution", cex=1.25), ylab=list(label="Log Base 10 of X4", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # ---------------------------------------------- # Testing Positive Exponents using Quantile-Normal plots tlen <- length(x4) exponent <- c(rep(1/4, tlen), rep(1/3, tlen), rep(1/2, tlen), rep(1, tlen), rep(2, tlen), rep(3, tlen)) tempx2 <- c(x4^(1/4), x4^(1/3), x4^(1/2), x4^1, x4^2, x4^3) pdf("Trans1QQNormX4Trans1.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(3,2), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X4", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # ---------------------------------------------- # Testing Positive Exponents using Quantile-Normal plots tlen <- length(x4) exponent <- c(rep(1/5, tlen), rep(1/4, tlen), rep(1/3, tlen), rep(1/2, tlen)) tempx2 <- c(x4^(1/5), x4^(1/4), x4^(1/3), x4^(1/2)) pdf("Trans1QQNormX4Trans2.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(2,2), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X4", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # ---------------------------------------------- # Testing Positive Exponents on a distribution that isn't # a positive exponent. tlen <- length(x3) exponent <- c(rep(1/4, tlen), rep(1/3, tlen), rep(1/2, tlen), rep(1, tlen), rep(2, tlen), rep(3, tlen)) tempx2 <- c(x3^(1/4), x3^(1/3), x3^(1/2), x3^1, x3^2, x3^3) pdf("Trans1QQNormX3Trans0.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(3,2), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X3", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # ---------------------------------------------- # Testing Positive Exponents on distributions that aren't # a positive exponent. tlen <- length(x3) exponent <- c(rep(1/4, tlen), rep(1/3, tlen), rep(1/2, tlen)) tempx2 <- c(x3^(1/4), x3^(1/3), x3^(1/2)) pdf("Trans1QQNormX3Trans0b.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(3,1), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X3", cex=1.25), scales=list(cex=1.25) ) ) dev.off() tlen <- length(x3) exponent <- c(rep(1/4, tlen), rep(1/3, tlen), rep(1/2, tlen)) tempx2 <- c(x2^(1/4), x2^(1/3), x2^(1/2)) pdf("Trans1QQNormX2Trans0b.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(3,1), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X3", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # ---------------------------------------------- # Testing Negative Exponents using Quantile-Normal plots tlen <- length(x2) exponent <- c(rep(-1/3, tlen), rep(-1/2, tlen), rep(-1, tlen), rep(-2, tlen)) tempx2 <- c(x2^(-1/3), x2^(-1/2), x2^-1, x2^-2) pdf("Trans1QQNormX2TransNeg1.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(2,2), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X2", cex=1.25), scales=list(cex=1.25) ) ) dev.off() tlen <- length(x2) exponent <- c(rep(-1/3, tlen), rep(-1/2, tlen)) tempx2 <- c(x2^(-1/3), x2^(-1/2)) pdf("Trans1QQNormX2TransNeg1b.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(2,1), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X2", cex=1.25), scales=list(cex=1.25) ) ) dev.off() tlen <- length(x3) exponent <- c(rep(-1/3, tlen), rep(-1/2, tlen), rep(-1, tlen), rep(-2, tlen)) tempx2 <- c(x3^(-1/3), x3^(-1/2), x3^-1, x3^-2) pdf("Trans1QQNormX3TransNeg1.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(2,2), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X3", cex=1.25), scales=list(cex=1.25) ) ) dev.off() tlen <- length(x3) exponent <- c(rep(-1/3, tlen), rep(-1/2, tlen)) tempx2 <- c(x3^(-1/3), x3^(-1/2)) pdf("Trans1QQNormX3TransNeg1b.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(2,1), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X3", cex=1.25), scales=list(cex=1.25) ) ) dev.off() tlen <- length(x4) exponent <- c(rep(-1/3, tlen), rep(-1/2, tlen), rep(-1, tlen), rep(-2, tlen)) tempx2 <- c(x4^(-1/3), x4^(-1/2), x4^-1, x4^-2) pdf("Trans1QQNormX4TransNeg1.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(2,2), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X4", cex=1.25), scales=list(cex=1.25) ) ) dev.off() tlen <- length(x4) exponent <- c(rep(-1/3, tlen), rep(-1/2, tlen)) tempx2 <- c(x4^(-1/3), x4^(-1/2)) pdf("Trans1QQNormX4TransNeg1b.pdf", height=6.4,width=8) print(qqmath(~ tempx2 | as.factor(exponent), panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, layout=c(2,1), xlab = list(label="Unit Normal Quantiles", cex=1.25), ylab=list(label="X4", cex=1.25), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # A function for a power transformation for data centered at zero centeredPower <- function(dataVector, exponent) { tData <- dataVector tData[dataVector < 0] <- tData[dataVector < 0] * -1 tData <- tData^(exponent) tData[dataVector < 0] <- tData[dataVector < 0] * -1 return(tData) } # --------------------------------------------------------------------- # Create two variables x and x^2 and look at their histograms x <- rnorm(300, mean=0, sd=1) xSquared <- x^2 pdf("Trans1HistXXsqrd.pdf", height=5 ,width=9) par(mfrow=c(1,2)) hist(x, col="lightblue") hist(xSquared, col="lightblue") dev.off() pdf("Trans1QQXXsqrd.pdf", height=5 ,width=5) print(qqmath(~ xSquared, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="xSquared",cex=1.75), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Try to transform back to normal distribution pdf("Trans1HistXXsqrdTrans1.pdf", height=5 ,width=9) par(mfrow=c(1,2)) hist(x, col="lightblue") hist(sqrt(xSquared), col="lightblue") dev.off() pdf("Trans1QQXXsqrdTrans1.pdf", height=5 ,width=5) print(qqmath(~ sqrt(xSquared), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="sqrt(xSquared)",cex=1.75), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Use the centeredPower transformation with a power of 2 x <- rnorm(300, mean=0, sd=1) xSquared <- centeredPower(x, 2) pdf("Trans2HistXXsqrdB.pdf", height=5 ,width=9) par(mfrow=c(1,2)) hist(x, col="lightblue") hist(xSquared, col="lightblue") dev.off() pdf("Trans2QQXsqrdB.pdf", height=5 ,width=5) print(qqmath(~ xSquared, distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="xSquared",cex=1.75), scales=list(cex=1.25) ) ) dev.off() # --------------------------------------------------------------------- # Try to transform back to normal distribution pdf("Trans2HistXXsqrdBTrans1.pdf", height=5 ,width=9) par(mfrow=c(1,2)) hist(x, col="lightblue") hist(centeredPower(xSquared,1/2), col="lightblue") dev.off() pdf("Trans2QQXsqrdBTrans1.pdf", height=5 ,width=5) print(qqmath(~ centeredPower(xSquared,1/2), distribution=qnorm, panel = function(x, ...) { panel.qqmathline(x, ...) panel.qqmath(x, ...) }, aspect=1, xlab = list(label="Normal Distribution",cex=1.75), ylab=list(label="centeredPower(xSquared,1/2)",cex=1.75), scales=list(cex=1.25) ) ) dev.off()