#-*- R -*- # Chapter 11 Multivariate Analysis library(mva) library(MASS) # needed for biplot.princomp postscript(file="ch11.ps", width=8, height=6, pointsize=9) options(width=65, digits=5) data(swiss) swiss.x <- as.matrix(swiss[,-1]) # 11.1 Graphical methods data(iris3) ir <- rbind(iris3[,,1], iris3[,,2], iris3[,,3]) ir.species <- factor(c(rep("s",50), rep("c",50), rep("v",50))) #if(interactive()) brush(ir) ir.pca <- princomp(log(ir), cor=T) ir.pca summary(ir.pca) plot(ir.pca) loadings(ir.pca) ir.pc <- predict(ir.pca) eqscplot(ir.pc[,1:2], type="n", xlab="first principal component", ylab = "second principal component") text(ir.pc[,1:2], labels = as.character(ir.species)) ir.scal <- cmdscale(dist(ir), k = 2, eig = T) ir.scal$points[, 2] <- -ir.scal$points[, 2] eqscplot(ir.scal$points, type="n") text(ir.scal$points, labels = as.character(ir.species), cex = 0.8) distp <- dist(ir) dist2 <- dist(ir.scal$points) sum((distp - dist2)^2)/sum(distp^2) ir.sam <- sammon(dist(ir[-143,])) eqscplot(ir.sam$points, type="n") text(ir.sam$points, labels = as.character(ir.species[-143]), cex = 0.8) ir.iso <- isoMDS(dist(ir[-143,])) eqscplot(ir.iso$points, type="n") text(ir.iso$points, labels = as.character(ir.species[-143]), cex = 0.8) data(fgl) fgl.iso <- isoMDS(dist(as.matrix(fgl[-40, -10]))) eqscplot(fgl.iso$points, type="n", xlab="", ylab="") # either for(i in seq(along=levels(fgl$type))) { set <- fgl$type[-40] == levels(fgl$type)[i] points(fgl.iso$points[set,], pch=18, cex=0.6, col=2+i)} #key(text=list(levels(fgl$type), col=3:8)) # or #text(fgl.iso$points, labels = c("F", "N", "V", "C", "T", "H") # [fgl$type[-40]], cex=0.6) data(state) state <- state.x77[,2:7] state <- state.x77[,2:7]; row.names(state) <- state.abb biplot(princomp(state, cor=T), pc.biplot=T, cex = 0.7, ex=0.8) # 11.2 Cluster analysis h <- hclust(dist(swiss.x), method="single") plot(h) # or plclust with library(cluster) cutree(h, 3) #plclust( clorder(h, cutree(h, 3) )) h <- hclust(dist(swiss.x), method="average") initial <- tapply(swiss.x, list(rep(cutree(h, 3), ncol(swiss.x)), col(swiss.x)), mean) dimnames(initial) <- list(NULL, dimnames(swiss.x)[[2]]) km <- kmeans(swiss.x, initial) swiss.pca <- princomp(swiss.x) swiss.pca swiss.px <- predict(swiss.pca) dimnames(km$centers)[[2]] <- dimnames(swiss.x)[[2]] swiss.centers <- predict(swiss.pca, km$centers) eqscplot(swiss.px[, 1:2], type="n", xlab="first principal component", ylab="second principal component") text(swiss.px[,1:2], labels = km$cluster) points(swiss.centers[,1:2], pch=3, cex=3) #identify(swiss.px[, 1:2], cex=0.5) if(F){ h <- mclust(swiss.x, method = "S*")$tree plclust( clorder(h, cutree(h, 3) )) h <- mclust(swiss.x, method = "S*", noise=T) hclass <- mclass(h, 3) hclass$class mreloc(hclass, swiss.x, method = "S*", noise=T) } library(cluster) swiss.pam <- pam(swiss.px, 3) summary(swiss.pam) eqscplot(swiss.px[, 1:2], type="n", xlab="first principal component", ylab="second principal component") text(swiss.px[,1:2], labels = swiss.pam$clustering) points(swiss.pam$medoid[,1:2], pch=3, cex=3) fanny(swiss.px, 3) par(mfrow=c(2,1)) pltree(agnes(swiss.x, method="single")) pltree(diana(swiss.x)) par(mfrow=c(1,1)) # 11.3 Correspondence analysis data(caith) corresp(caith) dimnames(caith)[[2]] <- c("F", "R", "M", "D", "B") par(mfcol=c(1,3)) plot(corresp(caith, nf=2)); title("symmetric") plot(corresp(caith, nf=2), type="rows"); title("rows") plot(corresp(caith, nf=2), type="col"); title("columns") par(mfrow=c(1,1)) # 11.4 Discriminant analysis ir.lda <- lda(log(ir), ir.species) ir.lda ir.ld <- predict(ir.lda, dimen=2)$x eqscplot(ir.ld, type="n", xlab="first linear discriminant", ylab = "second linear discriminant") text(ir.ld, labels = as.character(ir.species[-143]), cex = 0.8) plot(ir.lda, dimen=1) plot(ir.lda, type="density", dimen=1) data(fgl) par(mfrow=c(1,2), pty="s") fgl.lda <- lda(type ~ ., fgl) fgl.ld <- predict(fgl.lda, dimen=2)$x eqscplot(fgl.ld, type="n", xlab="LD2", ylab="LD1") # either for(i in seq(along=levels(fgl$type))) { set <- fgl$type[-40] == levels(fgl$type)[i] points(fgl.ld[set,], pch=18, cex=0.6, col=2+i)} #key(text=list(levels(fgl$type), col=3:8)) # or #text(fgl.ld, labels = c("F", "N", "V", "C", "T", "H")[fgl$type[-40]], cex=0.6) fgl.rlda <- lda(type ~ ., fgl, method="t") fgl.rld <- predict(fgl.rlda, dimen=2)$x eqscplot(fgl.rld, type="n", xlab="LD2", ylab="LD1") # either for(i in seq(along=levels(fgl$type))) { set <- fgl$type[-40] == levels(fgl$type)[i] points(fgl.rld[set,], pch=18, cex=0.6, col=2+i)} #key(text=list(levels(fgl$type), col=3:8)) # or #text(fgl.rld, labels = c("F", "N", "V", "C", "T", "H")[fgl$type[-40]], cex=0.6) par(mfrow=c(1,1), pty="m") # 11.5 Classification theory #increase len if you have enough memory. predplot <- function(object, main="", len=50, ...) { plot(Cushings[,1], Cushings[,2], log="xy", type="n", xlab="Tetrahydrocortisone", ylab = "Pregnanetriol", main=main) for(il in 1:4) { set <- Cushings$Type==levels(Cushings$Type)[il] text(Cushings[set, 1], Cushings[set, 2], labels=as.character(Cushings$Type[set]), col = 2 + il) } xp <- seq(0.6, 4.0, length=len) yp <- seq(-3.25, 2.45, length=len) cushT <- expand.grid(Tetrahydrocortisone=xp, Pregnanetriol=yp) Z <- predict(object, cushT, ...); zp <- as.numeric(Z$class) zp <- Z$post[,3] - pmax(Z$post[,2], Z$post[,1]) contour(exp(xp), exp(yp), matrix(zp, len), add=T, levels=0, labex=0) zp <- Z$post[,1] - pmax(Z$post[,2], Z$post[,3]) contour(exp(xp), exp(yp), matrix(zp, len), add=T, levels=0, labex=0) invisible() } data(Cushings) cush <- log(as.matrix(Cushings[, -3])) tp <- factor(Cushings$Type[1:21]) cush.lda <- lda(cush[1:21,], tp); predplot(cush.lda, "LDA") cush.qda <- qda(cush[1:21,], tp); predplot(cush.qda, "QDA") predplot(cush.qda, "QDA (predictive)", method = "predictive") predplot(cush.qda, "QDA (debiased)", method = "debiased") library(nnet) Cf <- data.frame(tp = tp, Tetrahydrocortisone = log(Cushings[1:21,1]), Pregnanetriol = log(Cushings[1:21,2]) ) cush.multinom <- multinom(tp ~ Tetrahydrocortisone + Pregnanetriol, Cf, maxit=250) xp <- seq(0.6, 4.0, length=100); np <- length(xp) yp <- seq(-3.25, 2.45, length=100) cushT <- expand.grid(Tetrahydrocortisone=xp, Pregnanetriol=yp) Z <- predict(cush.multinom, cushT, type="probs") plot(Cushings[,1], Cushings[,2], log="xy", type="n", xlab="Tetrahydrocortisone", ylab = "Pregnanetriol") for(il in 1:4) { set <- Cushings$Type==levels(Cushings$Type)[il] text(Cushings[set, 1], Cushings[set, 2], labels=as.character(Cushings$Type[set]), col = 2 + il) } zp <- Z[,3] - pmax(Z[,2], Z[,1]) contour(exp(xp), exp(yp), matrix(zp, np), add=T, levels=0, labex=0) zp <- Z[,1] - pmax(Z[,2], Z[,3]) contour(exp(xp), exp(yp), matrix(zp, np), add=T, levels=0, labex=0) library(tree) cush.tr <- tree(tp ~ Tetrahydrocortisone + Pregnanetriol, Cf) plot(cush[,1], cush[,2], type="n", xlab="Tetrahydrocortisone", ylab = "Pregnanetriol") for(il in 1:4) { set <- Cushings$Type==levels(Cushings$Type)[il] text(cush[set, 1], cush[set, 2], labels= as.character(Cushings$Type[set]), col = 2 + il) } par(cex=1.5); partition.tree(cush.tr, add=T); par(cex=1) # 11.6 Other classification methods # neural networks library(nnet) pltnn <- function(main, ...) { plot(Cushings[,1], Cushings[,2], log="xy", type="n", xlab="Tetrahydrocortisone", ylab = "Pregnanetriol", main=main, ...) for(il in 1:4) { set <- Cushings$Type==levels(Cushings$Type)[il] text(Cushings[set, 1], Cushings[set, 2], as.character(Cushings$Type[set]), col = 2 + il) } } plt.bndry <- function(size=0, decay=0, ...) { cush.nn <- nnet(cush, tpi, skip=T, softmax=T, size=size, decay=decay, maxit=1000) invisible(b1(predict(cush.nn, cushT), ...)) } b1 <- function(Z, ...) { zp <- Z[,3] - pmax(Z[,2], Z[,1]) contour(exp(xp), exp(yp), matrix(zp, np), add=T, levels=0, labex=0, ...) zp <- Z[,1] - pmax(Z[,3], Z[,2]) contour(exp(xp), exp(yp), matrix(zp, np), add=T, levels=0, labex=0, ...) } cush <- cush[1:21,]; tpi <- class.ind(tp) # functions pltnn and plt.bndry given in the scripts par(mfrow=c(2,2)) pltnn("Size = 2") set.seed(1); plt.bndry(size=2, col=2) set.seed(3); plt.bndry(size=2, col=3); plt.bndry(size=2, col=4) pltnn("Size = 2, lambda = 0.001") set.seed(1); plt.bndry(size=2, decay=0.001, col=2) set.seed(2); plt.bndry(size=0, decay=0.001, col=4) pltnn("Size = 2, lambda = 0.01") set.seed(1); plt.bndry(size=2, decay=0.01, col=2) set.seed(2); plt.bndry(size=2, decay=0.01, col=4) pltnn("Size = 5, 20 lambda = 0.01") set.seed(2); plt.bndry(size=5, decay=0.01, col=1) set.seed(2); plt.bndry(size=20, decay=0.01, col=2) # functions pltnn and b1 are in the scripts pltnn("Many local maxima") Z <- matrix(0, nrow(cushT), ncol(tpi)) for(iter in 1:20) { set.seed(iter) cush.nn <- nnet(cush, tpi, skip=T, softmax=T, size=3, decay=0.01, maxit=1000, trace=F) Z <- Z + predict(cush.nn, cushT) # In 5.x replace $ by @ in next line. cat("final value", format(round(cush.nn$value,3)), "\n") b1(predict(cush.nn, cushT), col=2, lwd=0.5) } pltnn("Averaged") b1(Z, lwd=3) # Non-parametric rules library(class) par(pty="s", mfrow=c(1,2)) plot(Cushings[,1], Cushings[,2], log="xy", type="n", xlab = "Tetrahydrocortisone", ylab = "Pregnanetriol", main = "1-NN") for(il in 1:4) { set <- Cushings$Type==levels(Cushings$Type)[il] text(Cushings[set, 1], Cushings[set, 2], as.character(Cushings$Type[set]), col = 2 + il) } Z <- knn(scale(cush, F, c(3.4, 5.7)), scale(cushT, F, c(3.4, 5.7)), tp) contour(exp(xp), exp(yp), matrix(as.numeric(Z=="a"), np), add=T, levels=0.5, labex=0) contour(exp(xp), exp(yp), matrix(as.numeric(Z=="c"), np), add=T, levels=0.5, labex=0) plot(Cushings[,1], Cushings[,2], log="xy", type="n", xlab="Tetrahydrocortisone", ylab = "Pregnanetriol", main = "3-NN") for(il in 1:4) { set <- Cushings$Type==levels(Cushings$Type)[il] text(Cushings[set, 1], Cushings[set, 2], as.character(Cushings$Type[set]), col = 2 + il) } Z <- knn(scale(cush, F, c(3.4, 5.7)), scale(cushT, F, c(3.4, 5.7)), tp, k=3) contour(exp(xp), exp(yp), matrix(as.numeric(Z=="a"), np), add=T, levels=0.5, labex=0) contour(exp(xp), exp(yp), matrix(as.numeric(Z=="c"), np), add=T, levels=0.5, labex=0) par(pty="m", mfrow=c(1,1)) # 11.7 Two extended examples # Leptograpsus variegatis crabs data(crabs) lcrabs <- log(crabs[,4:8]) crabs.grp <- factor(c("B", "b", "O", "o")[rep(1:4, rep(50,4))]) lcrabs.pca <- princomp(lcrabs) lcrabs.pc <- predict(lcrabs.pca) dimnames(lcrabs.pc) <- list(NULL, paste("PC", 1:5, sep="")) lcrabs.pca loadings(lcrabs.pca) if(F) { library(xgobi) xgobi(lcrabs, colors=c("SkyBlue", "SlateBlue", "Orange", "Red")[rep(1:4, rep(50, 4))]) xgobi(lcrabs, glyphs=12 + 5*rep(0:3, rep(50, 4))) } cr.scale <- 0.5 * log(crabs$CL * crabs$CW) slcrabs <- lcrabs - cr.scale cr.means <- matrix(0, 2, 5) cr.means[1,] <- apply(slcrabs[crabs$sex=="F",], 2, mean) cr.means[2,] <- apply(slcrabs[crabs$sex=="M",], 2, mean) dslcrabs <- slcrabs - cr.means[as.numeric(crabs$sex),] lcrabs.sam <- sammon(dist(dslcrabs)) eqscplot(lcrabs.sam$points, type="n", xlab="", ylab="") text(lcrabs.sam$points, labels = as.character(crabs.grp)) if(F) { crabs.h <- cutree(hclust(dist(dslcrabs)),2) table(crabs$sp, crabs.h) cr.means[1,] <- apply(dslcrabs[crabs.h==1,], 2, mean) cr.means[2,] <- apply(dslcrabs[crabs.h==2,], 2, mean) crabs.km <- kmeans(dslcrabs, cr.means) table(crabs$sp, crabs.km$cluster) } dcrabs.lda <- lda(crabs$sex ~ FL + RW + CL + CW, lcrabs) dcrabs.lda dcrabs.pred <- predict(dcrabs.lda) table(crabs$sex, dcrabs.pred$class) dcrabs.lda4 <- lda(crabs.grp ~ FL + RW + CL + CW, lcrabs) dcrabs.lda4 dcrabs.pr4 <- predict(dcrabs.lda4, dimen=2) dcrabs.pr2 <- dcrabs.pr4$post[, c("B","O")] %*% c(1,1) table(crabs$sex, dcrabs.pr2 > 0.5) cr.t <- dcrabs.pr4$x[,1:2] eqscplot(cr.t, type="n", xlab="First LD", ylab="Second LD") text(cr.t, labels = as.character(crabs.grp)) perp <- function(x, y) { m <- (x+y)/2 s <- - (x[1] - y[1])/(x[2] - y[2]) abline(c(m[2] - s*m[1], s)) invisible() } # For 5.x replace $means by @means cr.m <- lda(cr.t, crabs$sex)$means points(cr.m, pch=3, mkh=0.3) perp(cr.m[1,], cr.m[2,]) cr.lda <- lda(cr.t, crabs.grp) x <- seq(-6, 6, 0.25) y <- seq(-2, 2, 0.25) Xcon <- matrix(c(rep(x,length(y)), rep(y, rep(length(x),length(y)))),,2) cr.pr <- predict(cr.lda, Xcon)$post[, c("B","O")] %*% c(1,1) contour(x, y, matrix(cr.pr, length(x), length(y)), levels=0.5, labex=0, add=T, lty=3) for(i in c("O", "o", "B", "b")) print(var(lcrabs[crabs.grp==i, ])) # Forensic glass data(fgl) set.seed(123); rand <- sample (10, 214, replace=T) con <- function(x,y) { tab <- table(x,y) print(tab) diag(tab) <- 0 cat("error rate = ", round(100*sum(tab)/length(x),2),"%\n") invisible() } CVtest <- function(fitfn, predfn, ...) { res <- fgl$type for (i in sort(unique(rand))) { cat("fold ",i,"\n", sep="") learn <- fitfn(rand != i, ...) res[rand == i] <- predfn(learn, rand==i) } res } res.multinom <- CVtest( function(x, ...) multinom(type ~ ., fgl[x,], ...), function(obj, x) predict(obj, fgl[x, ],type="class"), maxit=1000, trace=F ) con(fgl$type, res.multinom) res.lda <- CVtest( function(x, ...) lda(type ~ ., fgl[x, ], ...), function(obj, x) predict(obj, fgl[x, ])$class ) con(fgl$type, res.lda) library(class) fgl0 <- fgl[ ,-10] # drop type { res <- fgl$type for (i in sort(unique(rand))) { cat("fold ",i,"\n", sep="") sub <- rand == i res[sub] <- knn(fgl0[!sub, ], fgl0[sub,], fgl$type[!sub], k=1) } res } -> res.knn1 con(fgl$type, res.knn1) res.lb <- knn(fgl0, fgl0, fgl$type, k=3, prob=T, use.all=F) table(attr(res.lb, "prob")) library(rpart) ## rpart 3.0 or later res.rpart <- CVtest( function(x, ...) { tr <- rpart(type ~ ., fgl[x,], ...) cp <- tr$cptable r <- cp[, 4] + cp[, 5] rmin <- min(seq(along=r)[cp[, 4] < min(r)]) cp0 <- cp[rmin, 1] cat("size chosen was", cp[rmin, 2] + 1, "\n") prune(tr, cp=1.01*cp0) }, function(obj, x) predict(obj, fgl[x, ], type="class"), # levels(fgl$type)[apply(predict(obj, fgl[x, ]), 1, which.is.max)], cp = 0.001 ) con(fgl$type, res.rpart) fgl1 <- lapply(fgl[, 1:9], function(x) {r <- range(x); (x-r[1])/diff(r)}) fgl1 <- data.frame(fgl1, type=fgl$type) # stepAIC does not currently work with new-style classes like multinom res.mult3 <- CVtest( function(xsamp, ...) { assign("xsamp", xsamp, envir=.GlobalEnv) obj <- multinom(type ~ ., fgl1[xsamp,], trace=F, ...) stepAIC(obj) }, function(obj, x) predict(obj, fgl1[x, ],type="class"), maxit=1000, decay=1e-3) con(fgl$type, res.mult3) CVnn2 <- function(formula, data, size = rep(6,2), lambda = c(0.001, 0.01), nreps = 1, nifold = 5, verbose = 99, ...) { CVnn1 <- function(formula, data, nreps=1, ri, verbose, ...) { truth <- data[,deparse(formula[[2]])] res <- matrix(0, nrow(data), length(levels(truth))) if(verbose > 20) cat(" inner fold") for (i in sort(unique(ri))) { if(verbose > 20) cat(" ", i, sep="") for(rep in 1:nreps) { learn <- nnet(formula, data[ri !=i,], trace=F, ...) res[ri == i,] <- res[ri == i,] + predict(learn, data[ri == i,]) } } if(verbose > 20) cat("\n") sum(as.numeric(truth) != max.col(res/nreps)) } truth <- data[,deparse(formula[[2]])] res <- matrix(0, nrow(data), length(levels(truth))) choice <- numeric(length(lambda)) for (i in sort(unique(rand))) { if(verbose > 0) cat("fold ", i,"\n", sep="") ri <- sample(nifold, sum(rand!=i), replace=T) for(j in seq(along=lambda)) { if(verbose > 10) cat(" size =", size[j], "decay =", lambda[j], "\n") choice[j] <- CVnn1(formula, data[rand != i,], nreps=nreps, ri=ri, size=size[j], decay=lambda[j], verbose=verbose, ...) } decay <- lambda[which.is.max(-choice)] csize <- size[which.is.max(-choice)] if(verbose > 5) cat(" #errors:", choice, " ") if(verbose > 1) cat("chosen size = ", csize, " decay = ", decay, "\n", sep="") for(rep in 1:nreps) { learn <- nnet(formula, data[rand != i,], trace=F, size=csize, decay=decay, ...) res[rand == i,] <- res[rand == i,] + predict(learn, data[rand == i,]) } } factor(levels(truth)[max.col(res/nreps)], levels = levels(truth)) } if(F) { # only run this if you have time to wait res.nn2 <- CVnn2(type ~ ., fgl1, skip=T, maxit=500, nreps=10) con(fgl$type, res.nn2) } cd0 <- lvqinit(fgl0, fgl$type, prior=rep(1,6)/6,k=3) cd1 <- olvq1(fgl0, fgl$type, cd0) con(fgl$type, lvqtest(cd1, fgl0)) CV.lvq <- function() { res <- fgl$type for(i in sort(unique(rand))) { cat("doing fold",i,"\n") cd0 <- lvqinit(fgl0[rand != i,], fgl$type[rand != i], prior=rep(1,6)/6, k=3) cd1 <- olvq1(fgl0[rand != i,], fgl$type[rand != i], cd0) cd1 <- lvq3(fgl0[rand != i,], fgl$type[rand != i], cd1, niter=10000) res[rand == i] <- lvqtest(cd1, fgl0[rand == i,]) } res } con(fgl$type, CV.lvq()) # Try Mahalanobis distance library(mva) fgl0 <- scale(princomp(fgl[,-10])$scores) con(fgl$type, CV.lvq()) # 11.8 Calibration plots CVprobs <- function(fitfn, predfn, ...) { res <- matrix(, 214, 6) for (i in sort(unique(rand))) { cat("fold ",i,"\n", sep="") learn <- fitfn(rand != i, ...) res[rand == i,] <- predfn(learn, rand==i) } res } probs.multinom <- CVprobs( function(x, ...) multinom(type ~ ., fgl[x,], ...), function(obj, x) predict(obj, fgl[x, ],type="probs"), maxit=1000, trace=F ) library(modreg) probs.yes <- as.vector(class.ind(fgl$type)) probs <- as.vector(probs.multinom) par(pty="s") plot(c(0,1), c(0,1), type="n", xlab="predicted probability", ylab="", xaxs="i", yaxs="i", las=1) rug(probs[probs.yes==0], 0.02, side=1, lwd=0.5) rug(probs[probs.yes==1], 0.02, side=3, lwd=0.5) abline(0,1) newp <- seq(0, 1, length=100) lines(newp, predict(loess(probs.yes ~ probs, span=1), newp)) # End of ch11