"diag.plots" <- function(reg.object) { # draws several diagnostic plots using the lm object # reg.object. Plots are # resididuals versus fitted values, with lowess line # normal plot of residuals # resididuals versus fitted values, with case number # leverage versus residuals plot par(mfrow=c(2,2)) res<-residuals(reg.object) pred<-fitted(reg.object) plot(pred,res,ylab="Residuals",xlab="Fitted values") lines(lowess(pred,res)) abline(h=0,lty=2) title("Plot of residuals versus fitted values") qqnorm(residuals(reg.object),ylab="Residuals") abline(0,sqrt(var(res)),lty=2) plot(pred, res, type = "n",ylab="Residuals", xlab="Fitted values",main="Labelled plot") text(pred, res, 1:length(pred)) lrplot(reg.object) layout(1, 1) invisible() } "influence.measures" <- function (lm.obj) { # calculates standard influence measures using the lm object # lm.obj is.influential <- function(infmat, n) { k <- ncol(infmat) - 4 if (n <= k) stop("Too few cases, n < k") absmat <- abs(infmat) result <- cbind(absmat[, 1:k] > 1, absmat[, k + 1] > 3 * sqrt(k/(n - k)), abs(1 - infmat[, k + 2]) > (3 * k)/(n - k), pf(infmat[, k + 3], k, n - k) > 0.5, infmat[, k + 4] > (3 * k)/n) dimnames(result) <- dimnames(infmat) result } infl <- lm.influence(lm.obj) p <- lm.obj$rank e <- weighted.residuals(lm.obj) s <- sqrt(sum(e^2, na.rm = TRUE)/df.residual(lm.obj)) xxi <- chol2inv(lm.obj$qr$qr, lm.obj$qr$rank) si <- infl$sigma h <- infl$hat dfbetas <- infl$coefficients/outer(infl$sigma, sqrt(diag(xxi))) vn <- variable.names(lm.obj) vn[vn == "(Intercept)"] <- "1_" colnames(dfbetas) <- paste("dfb", abbreviate(vn), sep = ".") dffits <- e * sqrt(h)/(si * (1 - h)) cov.ratio <- (si/s)^(2 * p)/(1 - h) cooks.d <- ((e/(s * (1 - h)))^2 * h)/p dn <- dimnames(lm.obj$qr$qr) infmat <- cbind(dfbetas, dffit = dffits, cov.r = cov.ratio, cook.d = cooks.d, hat = h) is.inf <- is.influential(infmat, sum(h > 0)) ans <- list(infmat = infmat, is.inf = is.inf, call = lm.obj$call) class(ans) <- "infl" ans } "influence.plots"<- function(lm.obj){ # draws index plots of influence measures op <- par(ask = TRUE) on.exit(par(op)) infl <- lm.influence(lm.obj) p <- lm.obj$rank e <- weighted.residuals(lm.obj) s <- sqrt(sum(e^2, na.rm = TRUE)/df.residual(lm.obj)) xxi <- chol2inv(lm.obj$qr$qr, lm.obj$qr$rank) si <- infl$sigma h <- infl$hat n<-length(h) dfbetas <- abs(infl$coefficients/outer(infl$sigma, sqrt(diag(xxi)))) vn <- variable.names(lm.obj) vn[vn == "(Intercept)"] <- "1_" colnames(dfbetas) <- paste("dfb", abbreviate(vn), sep = ".") dffits <- abs(e * sqrt(h)/(si * (1 - h))) cov.ratio <- (si/s)^(2 * p)/(1 - h) CR1<-abs(cov.ratio-1) cooks.d <- ((e/(s * (1 - h)))^2 * h)/p no.of.plots<-p+4 maintext<-c(colnames(dfbetas),"DFFITS"," ABS(COV RATIO-1)", "Cook's D", "Hats") show.label<-matrix(FALSE,n,no.of.plots) for(j in 1:p) show.label[,j]<-dfbetas[,j]>(2/sqrt(n)) show.label[,p+1]<-dffits>(3*sqrt(p/(n-p))) # DFFITS show.label[,p+2]<-CR1>3*p/n # COV Ratio show.label[,p+3]<-cooks.d>qf(0.5,p,n-p) # Cooks D show.label[,p+4]<-h>3*p/n # Hats dfbetas<-cbind(dfbetas,dffits,CR1,cooks.d,h) for(j in 1:(p+4)){ show.i<-(1:n)[show.label[,j]] plot(dfbetas[,j], ylim=c(0,max(dfbetas[,j])),type = "h", main=maintext[j], xlab = "Obs. number", ylab = maintext[j]) if(length(show.i)>0) text(show.i, dfbetas[show.i,j] + 0.4 * cex.id * strheight(" "), show.i) } } "funnel" <- function(reg.object) { # diagnostic plots for detecting non-constant variance # plots log sds vs log means # and then squared residuals versus fitted values # returns a smoothed version of the plot as an # estimate of the variance function library(modreg) layout(1,2) pred<-fitted(reg.object) res<-residuals(reg.object) cut.points<-quantile(pred,c(0.,.2,.4,.6,.8,1.0)) + c(-0.01,0,0,0,0,1.01) group<-cut(pred,cut.points) log.means<-log(tapply(pred+res,group,mean)) log.sds<-log(sqrt(tapply(pred+res,group,var))) plot(log.means,log.sds,xlab="Log means",ylab="Log std. errors") res.sq<-res^2 plot(pred,res.sq,ylab="Squared residuals",xlab="Fitted values") low.stuff<-loess(res.sq~pred,span=1) lines(sort(pred),fitted(low.stuff)[order(pred)],lty=2) cat("Slope:",lm(log.sds~log.means)$coef[2],"\n") layout(1,1) invisible(fitted(low.stuff)) } "boxcoxplot" <- function (X,y,p = seq(-2, 2, length = 20), ...) { # Draws a box-Cox plot for transforming to normality # X matrix of explanatory variables # y response variable l <- length(p) boxcox <- seq(l) n <- length(y) sumlog <- sum(log(y)) for (i in seq(l)) { y.p<- (y^p[i] -1)/p[i] trans.res<-residuals(lm(y.p~X)) ResSS.p<-sum(trans.res^2) boxcox[i] <- n * log(ResSS.p)/2 - (p[i] - 1) * sumlog } plot(p, boxcox, type = "l", ylab = "Profile likelihood", main = "Box-Cox plot", ...) } "partial.plots" <- function(lm.obj,intercept=T) { # Draws added variable plots (also known as partial regression plots) X <- model.matrix(lm.obj$terms, model.frame(lm.obj)) y<-X%*%coefficients(lm.obj)+residuals(lm.obj) X<-X[,-1] k<-dim(X)[2] for(i in (1:k)) { z1 <- lsfit(X[, - i], X[, i], intercept = intercept) z2 <- lsfit(X[, - i], y, intercept = intercept) plotname <- dimnames(X)[[2]][i] plot(z1$residuals, z2$residuals, xlab = plotname, ylab = "Residuals", main = paste("Partial plot of", plotname)) abline(h=0,lty=1) lines(lowess(residuals(z1),residuals(z2)),lty=2) } invisible() } "added.variable.plots" <- function(lm.obj,intercept=T) { # Draws added variable plots (also known as partial regression plots) X <- model.matrix(lm.obj$terms, model.frame(lm.obj)) y<-X%*%coefficients(lm.obj)+residuals(lm.obj) X<-X[,-1] k<-dim(X)[2] for(i in (1:k)) { z1 <- lsfit(X[, - i], X[, i], intercept = intercept) z2 <- lsfit(X[, - i], y, intercept = intercept) plotname <- dimnames(X)[[2]][i] plot(z1$residuals, z2$residuals, xlab = plotname, ylab = "Residuals", main = paste("Partial plot of", plotname)) abline(h=0,lty=1) lines(lowess(residuals(z1),residuals(z2)),lty=2) } invisible() } "all.poss.regs" <- function(X,y,nbest=3) { # calculates model goodness statistics for All Possible Regressions library(leaps) selection.stuff<-leaps(data.matrix(X),as.numeric(y), nbest=nbest,method="r2",strictly.compatible=F) n<-length(y) Rsq<-selection.stuff$r2 rssp<-(1-Rsq)*sum((y-mean(y))^2) p<-selection.stuff$size-1 sigma2<-rssp/(n-p-1) adjRsq<- 1-(1-Rsq)*(n-1)/(n-p-1) Cp<-rssp/sigma2[length(p)] + 2*(p+1)-n round(cbind(rssp,sigma2,adjRsq,Cp,selection.stuff$which),3) } lrplot<-function(lm.object) { # draws a leverage-residual plot hats<-lm.influence(lm.object)$hat plot(residuals(lm.object)^2, hats, xlab= "Squared residuals", ylab = "influence",type="n") text(residuals(lm.object)^2, hats, 1:length(hats)) abline(h=3*length(coef(lm.object))/length(residuals(lm.object)),lty=2) title("Leverage-residual plot") invisible() } ######################################## layout<-function(x,y) { par(mfrow=c(x,y)) invisible() } ####################################### # partial residual plots partial.res.plot<-function(lm.obj){ X<-model.matrix(lm.obj) p<-dim(X)[2] if(p==1)return("No variables in regression, constant term only") for(j in 2:p){ z<-model.matrix(lm.obj)[,j] res.z<-z*coef(lm.obj)[j] + residuals(lm.obj) plotname<-dimnames(X)[[2]][j] plot(z,res.z,xlab=dimnames(X)[[2]][j], ylab="partial residual", main=paste("partial plot for variable",plotname) ) lines(lowess(z,res.z),lwd=2,col="red") readline("Press any key to continue\n") } invisible() } ####################################### par.residual.plots<-function(glm.object) { X<-model.matrix(glm.object$terms, model.frame(glm.object)) X<-X[,-1] k<-dim(X)[2] p.res<-residuals(glm.object,type="pearson") weights<-if (is.null(glm.object$prior.weights))1 else(glm.object$prior.weights) partial.res<-p.res/sqrt((weights*glm.object$fitted.values*(1- glm.object$fitted.values))) for(i in (1:k)){ plotname<-dimnames(X)[[2]][i] plot(X[, i],partial.res+glm.object$coefficient[i+1]*X[, i], ylab="Partial residuals",xlab=plotname, main=paste("Partial residual plot of", plotname)) lines(lowess(X[, i],partial.res+glm.object$coefficient[i+1]*X[, i]),lty=2) if((i%%4)==0) pause() } invisible() } ######################################## glm.diag.plots<-function(glm.model) { layout(2,2) p.res<-residuals(glm.model,type="pearson") d.res<-residuals(glm.model,type="deviance") plot(d.res,type="n",xlab="Observation number", ylab="Deviance Residuals", main="Index plot of deviance residuals") text(d.res) hats<-lm.influence(glm.model)$hat plot(hats,type="n",ylim=c(0,max(hats)), xlab="Observation Number" ,ylab="Leverage", main="Leverage plot") text(hats) p<-glm.model$rank cooks<-(p.res^2*hats)/(p*(1-hats)^2) plot(cooks,type="n",ylim=c(0,max(cooks)), xlab="Observation number",ylab="Cook's Distance", main="Cook's Distance Plot") text(cooks) dev.changes<-d.res^2+p.res^2*(hats/(1-hats)) plot(dev.changes,type="n",ylim=c(0,max(dev.changes)), xlab="Observation number",ylab="Deviance changes", main="Deviance Changes Plot") text(dev.changes) invisible() } ###################################### # WB.test<-function(lm.obj,n.rep=1000){ # Calculates the Weisberg-Bingham test # and its approximate p-value sorted.res<-sort(residuals(lm.obj)) n<-length(sorted.res) normal.quantiles<-qnorm(((1:n)-0.5)/n) WB<-cor(normal.quantiles,sorted.res) WB.vec<-numeric(n.rep) for(i in 1:n.rep)WB.vec[i]<-cor(normal.quantiles,sort(rnorm(n))) cat("WB test statistic = ",round(WB,3),"\n") cat("p = ",round(mean(WB.vec