# R code to accompany # "Row-Column Interaction Models, with an R implementation" by # Thomas W. Yee and Alfian F. Hadi, # Computational Statistics, 2014 (in press). # Most of the examples here come from Section 5. # Last modified: 20140521. # Note: future changes to \pkg{VGAM} and \pkg{VGAMdata} are possible. # The latest version of these R packages are available at # http://www.stat.auckland.ac.nz/~yee # ====================================================================== # Set things up # Version 0.9-4 of both packages are required. library("VGAM") # Modelling functions are here. library("VGAMdata") # Some data sets are here. # Sweave options ps.options(pointsize = 12) options(width = 72, digits = 4) options(SweaveHooks = list(fig = function() par(las = 1))) options(prompt = "R> ", continue = "+ ") options(continue = " ") # ====================================================================== # GRC model fitted to alcoholic offences (alcoff) # See Section 5.1 of Yee and Hadi (2014). alcoff # A 24 x 7 response matrix of counts # Independence Poisson model grc0.alcoff <- rcim(moffset(alcoff, "6", postfix = "*"), rprefix = "Hour.24.", cprefix = "Day.") # Plot the main effects par(mfrow = c(1, 2), mar = c(4, 4.5, 0.9, 0.9) + 0.1) plot(grc0.alcoff, rcol = "blue", ccol = "orange", rfirst = 14, cfirst = 1, rtype = "h", ctype = "h", lwd = 2, ylim = c(-1.5, 1.5), cylab = "Effective daily effects", rylab = "Hourly effects", rxlab = "Hour", cxlab = "Effective day") # The estimated row and column effects may be extracted as follows. round(coef(grc0.alcoff), dig = 2) plot.grc0.alcoff <- plot(grc0.alcoff, which = NULL) round(plot.grc0.alcoff@post$raw.col.effects, dig = 2) round(plot.grc0.alcoff@post$col.effects, dig = 2) class(grc0.alcoff) summary(grc0.alcoff) # ====================================================================== # Median polish fit to crash injuries, \code{crashi}. # See Section 5.2 of Yee and Hadi (2014). # Note that the asymmetric Laplace distribution is not naturally # well-suited to estimation by scoring, hence this example is not # fully reliable. crashi # crash injuries; a 24 x 7 matrix of counts # Fit the median polish model mp0.crashi <- rcim(crashi, alaplace2(tau = 0.5, intparloc = TRUE)) # Plot the main effects par(mfrow = c(1, 2), mar = c(4.4, 4.3, 0.9, 1.9) + 0.1) plot.mp0.crashi <- plot(mp0.crashi, lwd = 2, hlwd = 2, hcol = "purple", rcol = "blue", ccol = "darkorange", rtype = "p", ctype = "p", cylab = "Daily effects", rylab = "Hourly effects", rxlab = "Hour", cxlab = "Day") # ====================================================================== # Quasi-variances example. # Ships data, a simple Poisson regression. # See Section 5.3 of Yee and Hadi (2014). data("ships", package = "MASS") # Get the 'ships' data from MASS ships$Year <- as.factor(ships$year) # A factor version of years ships$Period <- as.factor(ships$period) # A factor version of period Shipmodel <- vglm(incidents ~ type + Year + Period, quasipoissonff, data = ships, subset = (service > 0), trace = TRUE, offset = log(service)) fit1 <- rcim(Qvar(Shipmodel, "type"), uninormal("explink"), maxit = 101) quasiVar <- qvar(fit1) # Same as diag(predict(fit1)[, c(TRUE, FALSE)]) / 2 quasiSE <- sqrt(quasiVar) quasiVar quasiSE # 5% LSD intervals (arrows) based on quasi-variances. par(mfrow = c(1, 1), mar = c(4.1, 4.3, 0.1, 0.1) + 0.1) plotqvar(fit1, scol = "blue", pch = 16, slwd = 1.5, las = 1, length.arrows = 0.07) # ====================================================================== # Quasi-variances example. # Zero-inflated Poisson (ZIP) example. # xs.nz data subset, with babies as a response. # See Section 5.4 of Yee and Hadi (2014). # Select the subset xs.nz.f <- subset(xs.nz, sex == "F") # Only females xs.nz.f <- subset(xs.nz.f, !is.na(babies) & !is.na(age) & !is.na(ethnic)) xs.nz.f <- xs.nz.f[xs.nz.f$babies != "-", ] # Delete this one person xs.nz.f$babies <- as.numeric(as.character(xs.nz.f$babies)) xs.nz.f <- subset(xs.nz.f, babies >= 0) xs.nz.f <- subset(xs.nz.f, as.numeric(as.character(ethnic)) <= 2) # Not a Poisson, looks like a zero-inflated Poisson with(xs.nz.f, barplot(table(babies), las = 1, ylab = "Frequency", xlab = "Babies")) # Fit a zero-inflated Poisson with smooth functions of age clist <- list("bs(age, df = 4)" = rbind(1, 0), "bs(age, df = 3)" = rbind(0, 1), "ethnic" = diag(2), "(Intercept)" = diag(2)) fit1.qvzip <- vglm(babies ~ bs(age, df = 4) + bs(age, df = 3) + ethnic, zipoissonff(zero = NULL), data = xs.nz.f, trace = TRUE, constraints = clist) # Compute and plot the Quasi-SE plots for a zero-inflated Poisson # model fitted to a subset of females from \texttt{xs.nz} with babies # as the response. Fit1.qvzip <- rcim(Qvar(fit1.qvzip, "ethnic", which = 1), uninormal("explink"), maxit = 99, trace = TRUE) Fit2.qvzip <- rcim(Qvar(fit1.qvzip, "ethnic", which = 2), uninormal("explink"), maxit = 99, trace = TRUE) # Plot the quasi-SEs par(mfrow = c(1, 2), mar = c(4, 4.5, 0.9, 0.9) + 0.1) plotqvar(Fit1.qvzip, scol = "blue", pch = 16, main = expression(eta[1]), slwd = 1.5, las = 1, length.arrows = 0.07) plotqvar(Fit2.qvzip, scol = "blue", pch = 16, main = expression(eta[2]), slwd = 1.5, las = 1, length.arrows = 0.07) # ====================================================================== # Goodman's RC-association model (GRC model) and # unconstrained quadratic ordination (UQO). # This example shows how fitting a rank-1 GRC to Poisson counts is # one method for performing a rank-1 UQO with species having # equal tolerances. # See Section 5.5 of Yee and Hadi (2014). # The results easily generalize to higher dimensions. set.seed(123) # For reproducibility n <- 100; p <- 5; S <- 5 # S is the number of species pdata <- rcqo(n, p, S, es.opt = FALSE, eq.max = FALSE, eq.tol = FALSE, sd.tolerances = 1, sd.latvar = 0.75) # Random X and Y true.nu <- attr(pdata, "latvar") # The 'truth' Y <- pdata[, paste0("y", 1:S)] # Y matrix (n x S) uqo.rcim1 <- rcim(Y, Rank = 1, str0 = NULL, # Delta covers entire n x M matrix iindex = 1:nrow(Y), # RRR covers the entire Y has.intercept = FALSE) # Suppress the intercept # Plot the results par(mfrow = c(2, 2), mar = c(4.5, 4.5, 0.9, 0.9) + 0.1) # Plot "(a)" for the optimums plot(attr(pdata, "optima"), Coef(uqo.rcim1)@A, col = "blue", type = "p", main = "(a)") mylm <- lm(Coef(uqo.rcim1)@A ~ attr(pdata, "optima")) abline(coef = coef(mylm), col = "orange", lty = "dashed") # Plot "(b)" for the site scores fill.val <- NULL # Choose this for the new parameterization plot(attr(pdata, "latvar"), c(fill.val, concoef(uqo.rcim1)), las = 1, col = "blue", type = "p", main = "(b)") mylm <- lm(c(fill.val, concoef(uqo.rcim1)) ~ attr(pdata, "latvar")) abline(coef = coef(mylm), col = "orange", lty = "dashed") # Plot unequal-tolerances CQOs. # Plot "(c)" is the 'truth'. # Plot "(d)" is for the UQO site scores and should be similar to (c). myform <- attr(pdata, "formula") p1ut <- cqo(myform, family = poissonff, eq.tol = FALSE, trace = FALSE, data = pdata) c1ut <- cqo(cbind(y1, y2, y3, y4, y5) ~ scale(latvar(uqo.rcim1)), family = poissonff, eq.tol = FALSE, trace = FALSE, data = pdata) lvplot(p1ut, lcol = 1:S, y = TRUE, pcol = 1:S, pch = 1:S, pcex = 0.5, main = "(c)") lvplot(c1ut, lcol = 1:S, y = TRUE, pcol = 1:S, pch = 1:S, pcex = 0.5, main = "(d)") # ====================================================================== # Rasch model example. # Note: requires \pkg{VGAMdata} version 0.9-4 or later. summary(exam1) # The names of the students are in the first column # Fit a simple Rasch model. # First, remove all questions and people who were totally correct or wrong Exam1.1 <- exam1 [, c(TRUE, colMeans(exam1 [, -1]) > 0)] Exam1.1 <- Exam1.1[, c(TRUE, colMeans(Exam1.1[, -1]) < 1)] Exam1.1 <- Exam1.1[rowMeans(Exam1.1[, -1]) > 0, ] Exam1.1 <- Exam1.1[rowMeans(Exam1.1[, -1]) < 1, ] rdata <- Exam1.1 Y.matrix <- rdata[, -1] # Exclude the names of the students # Fit the simple Rasch model. # Argument 'mv' renamed to 'multiple.responses': rfit <- rcim(Y.matrix, family = binomialff(multiple.responses = TRUE), trace = TRUE) coef(rfit) # Row and column effects constraints(rfit, matrix = TRUE) # Constraint matrices side-by-side dim(model.matrix(rfit, type = "vlm")) # 'Big' VLM matrix # This plot shows the (main) row and column effects par(mfrow = c(1, 2), las = 1, mar = c(4.5, 4.4, 2, 0.9) + 0.1) rfit.extra <- plot(rfit, rcol = "blue", ccol = "orange", cylab = "Item effects", rylab = "Person effects", rxlab = "", cxlab = "") names(rfit.extra@post) # Some useful output put here cbind(rfit.extra@post$row.effects) cbind(rfit.extra@post$raw.row.effects) round(cbind(-rfit.extra@post$col.effects), dig = 3) round(cbind(-rfit.extra@post$raw.col.effects), dig = 3) round(matrix(-rfit.extra@post$raw.col.effects, ncol = 1, # Rename for humans dimnames = list(colnames(Y.matrix), NULL)), dig = 3) # ======================================================================