# Script of code for running the azpro example. # Section 7.2 of "Reduced-rank vector generalized linear models with two # linear predictors" # by T. W. Yee, appearing in Computational Statistics & Data Analysis. # Last modified: # 20120206; 20120730; 20130104; 20130116; 20130206, 20131002 # Assumes the following packages have been installed: # 1. VGAM 0.9-2 or higher, currently at # http://www.stat.auckland.ac.nz/~yee/VGAM/prerelease # and CRAN. # 2. VGAMdata 0.9-2 or higher, currently at CRAN. # 3. COUNT. library("VGAM") library("VGAMdata") # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # Get the data ready data("azpro", package = "COUNT") summary(azpro) dim(azpro) # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # Now fit some regressions ## Standard Poisson regression --------------------------------------- fit.p <- vglm(los ~ procedure + sex + admit + age75, poissonff, data = azpro, trace = TRUE) summary(fit.p) ## Quasi-likelihood Poisson (QLP) regression ------------------------- fit.qlp <- vglm(los ~ procedure + sex + admit + age75, quasipoissonff, data = azpro, trace = TRUE) summary(fit.qlp) # Method of moments used to estimate phi ## NB-1 -------------------------------------------------------------- fit.nb1 <- vglm(los ~ procedure + sex + admit + age75, negbinomial(parallel = TRUE, zero = NULL), data = azpro, trace = TRUE) coef(fit.nb1, matrix = TRUE) Confint.nb1(fit.nb1) ## NB-2 (k intercept-only) ------------------------------------------- fit.nb2 <- vglm(los ~ procedure + sex + admit + age75, negbinomial, data = azpro, trace = TRUE) coef(fit.nb2, matrix = TRUE) summary(fit.nb2) ## NB-H -------------------------------------------------------------- fit.nbh <- vglm(los ~ procedure + sex + admit + age75, negbinomial(zero = NULL), data = azpro, trace = TRUE) coef(fit.nbh, matrix = TRUE) summary(fit.nbh) ## RR-NB == NB-P ----------------------------------------------------- fit.rrnb <- rrvglm(los ~ procedure + sex + admit + age75, negbinomial(zero = NULL), data = azpro, trace = TRUE) coef(fit.rrnb, matrix = TRUE) Coef(fit.rrnb) summary(fit.rrnb) Confint.rrnb(fit.rrnb) # Shows it is not NB-1 or NB-2 !! logLik(fit.rrnb) # LRT (manually) pchisq(2 * (logLik(fit.rrnb) - logLik(fit.nb2)), df = df.residual(fit.nb2) - df.residual(fit.rrnb), lower.tail = FALSE) # LRT (easy way) lrtest(fit.nb2, fit.rrnb) # Extract out the coeffs, SEs, Wald statistics. betas <- cbind("NB-2 coeffs" = coef(fit.nb2, matrix = TRUE)[, 1], "NB-P coeffs" = coef(fit.rrnb, matrix = TRUE)[, 1]) round(betas, 3) stderrs <- cbind("NB-2 SEs" = sqrt(diag(vcov(fit.nb2)))[-2], "NB-P SEs" = sqrt(diag(vcov(fit.rrnb)))[-c(1, 2+1)]) round(stderrs, 3) walds <- betas / stderrs colnames(walds) <- c("NB-2 Walds", "NB-P Walds") round(walds, 1) # -------------------------------------------------------------------- # Plot the abundances and fitted values against the latent variable par(mar = c(5.1, 4.1, 1.3, 0.3), oma = rep(0, len = 4), mfrow = c(1, 1), las = 1, xpd = FALSE) set.seed(123) plot(jitter(los, factor = 0) ~ jitter(lv(fit.rrnb), factor = 90), data = azpro, col = "blue", ylab = "Length of stay", xlab = "Latent variable (jittered)", las = 1) ooo <- order(lv(fit.rrnb)) lines(fitted(fit.rrnb)[ooo] ~ lv(fit.rrnb)[ooo], col = "darkorange", lwd = 2) # dev.off() # ---------------------------------------------------------------------- # Plot the loglik as a function of a21 (profile likelihood): par(mar = c(5.1, 4.7, 1.3, 0.3), oma = rep(0, len = 4), mfrow = c(1, 1), las = 1, xpd = FALSE) par(mgp = c(3.5, 1, 0), las = 1) # @post$a21.matrix is assigned. Rather expensive. fit.rrnb <- plota21(fit.rrnb, plot.it = TRUE, se.eachway = c(6.5, 5), nseq.a21 = 31, trace.arg = TRUE) fit.rrnb@post # Use interpolation to estimate the log-likelihood at 0 and 1. ell.at.01 <- approx(x = fit.rrnb@post$a21.matrix[, 1], y = fit.rrnb@post$a21.matrix[, 2], xout = 0:1) # Add some more info to the plot. points(ell.at.01$x, ell.at.01$y, pch = 20, cex = 1.1, col = "black") text(ell.at.01$x[1], ell.at.01$y[1], "NB-2", adj = -0.3) text(ell.at.01$x[2], ell.at.01$y[2], "NB-1", adj = 1.3) answer <- Confint.rrnb(fit.rrnb) shift.down <- 1.3 text(answer$a21.hat, logLik(fit.rrnb) - shift.down, "RR-NB", adj = 0.5) # "NB-P" points(answer$a21.hat, logLik(fit.rrnb), pch = 20, cex = 1.1, col = "black") # dev.off() # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # Fit a new model: a RR-Positive-NB fit.rrposnb <- rrvglm(los ~ procedure + sex + admit + age75, posnegbinomial(zero = NULL), data = azpro, trace = TRUE) summary(fit.rrposnb) coef(fit.rrposnb, matrix = TRUE) Coef(fit.rrposnb) logLik(fit.rrposnb) - logLik(fit.rrnb) # Large positive value # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,