# Script of code for running the azpro example.
# Section 7.2 of "Reduced-rank vector generalized linear models with two
# linear predictors"
# by T. W. Yee, appearing in Computational Statistics & Data Analysis.


# Last modified:
# 20120206; 20120730; 20130104; 20130116; 20130206, 20131002


# Assumes the following packages have been installed:
# 1. VGAM 0.9-2 or higher, currently at
#    http://www.stat.auckland.ac.nz/~yee/VGAM/prerelease
#    and CRAN.
# 2. VGAMdata 0.9-2 or higher, currently at CRAN.
# 3. COUNT.


library("VGAM")
library("VGAMdata")


# ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# Get the data ready

data("azpro", package = "COUNT")

summary(azpro)

dim(azpro)


# ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# Now fit some regressions


## Standard Poisson regression ---------------------------------------
fit.p <- vglm(los ~ procedure + sex + admit + age75,
              poissonff, data = azpro, trace = TRUE)
summary(fit.p)


## Quasi-likelihood Poisson (QLP) regression -------------------------
fit.qlp <- vglm(los ~ procedure + sex + admit + age75,
                quasipoissonff, data = azpro, trace = TRUE)
summary(fit.qlp)  # Method of moments used to estimate phi


## NB-1 --------------------------------------------------------------
fit.nb1 <- vglm(los ~ procedure + sex + admit + age75,
                negbinomial(parallel = TRUE, zero = NULL),
                data = azpro, trace = TRUE)
coef(fit.nb1, matrix = TRUE)
Confint.nb1(fit.nb1)


## NB-2 (k intercept-only) -------------------------------------------
fit.nb2 <- vglm(los ~ procedure + sex + admit + age75,
                negbinomial, data = azpro, trace = TRUE)


coef(fit.nb2, matrix = TRUE)
summary(fit.nb2)


## NB-H --------------------------------------------------------------
fit.nbh <- vglm(los ~ procedure + sex + admit + age75,
                negbinomial(zero = NULL),
                data = azpro, trace = TRUE)

coef(fit.nbh, matrix = TRUE)

summary(fit.nbh)


## RR-NB == NB-P -----------------------------------------------------
fit.rrnb <- rrvglm(los ~ procedure + sex + admit + age75,
                   negbinomial(zero = NULL),
                   data = azpro, trace = TRUE)


coef(fit.rrnb, matrix = TRUE)


Coef(fit.rrnb)


summary(fit.rrnb)


Confint.rrnb(fit.rrnb)  # Shows it is not NB-1 or NB-2  !!


logLik(fit.rrnb)


# LRT (manually)
pchisq(2 * (logLik(fit.rrnb) - logLik(fit.nb2)),
       df = df.residual(fit.nb2) - df.residual(fit.rrnb),
       lower.tail = FALSE)


# LRT (easy way)
lrtest(fit.nb2, fit.rrnb)


# Extract out the coeffs, SEs, Wald statistics.
betas <- cbind("NB-2 coeffs" = coef(fit.nb2,  matrix = TRUE)[, 1],
               "NB-P coeffs" = coef(fit.rrnb, matrix = TRUE)[, 1])
round(betas, 3)


stderrs <- cbind("NB-2 SEs" = sqrt(diag(vcov(fit.nb2)))[-2],
                 "NB-P SEs" = sqrt(diag(vcov(fit.rrnb)))[-c(1, 2+1)])
round(stderrs, 3)


walds <- betas / stderrs
colnames(walds) <- c("NB-2 Walds", "NB-P Walds")
round(walds, 1)


# --------------------------------------------------------------------
# Plot the abundances and fitted values against the latent variable


par(mar = c(5.1, 4.1, 1.3, 0.3), oma = rep(0, len = 4),
    mfrow = c(1, 1), las = 1, xpd = FALSE)

set.seed(123)

plot(jitter(los, factor = 0) ~ jitter(lv(fit.rrnb), factor = 90),
     data = azpro, col = "blue",
     ylab = "Length of stay",
     xlab = "Latent variable (jittered)", las = 1)
ooo <- order(lv(fit.rrnb))
lines(fitted(fit.rrnb)[ooo] ~ lv(fit.rrnb)[ooo],
      col = "darkorange", lwd = 2)


# dev.off()


# ----------------------------------------------------------------------
# Plot the loglik as a function of a21 (profile likelihood):

par(mar = c(5.1, 4.7, 1.3, 0.3), oma = rep(0, len = 4),
    mfrow = c(1, 1), las = 1, xpd = FALSE)
par(mgp = c(3.5, 1, 0), las = 1)


# @post$a21.matrix is assigned. Rather expensive.
fit.rrnb <- plota21(fit.rrnb, plot.it = TRUE,
                    se.eachway = c(6.5, 5), nseq.a21 = 31,
                    trace.arg = TRUE)
fit.rrnb@post


# Use interpolation to estimate the log-likelihood at 0 and 1.
ell.at.01 <- approx(x = fit.rrnb@post$a21.matrix[, 1],
                    y = fit.rrnb@post$a21.matrix[, 2],
                    xout = 0:1)


# Add some more info to the plot.
points(ell.at.01$x, ell.at.01$y, pch = 20, cex = 1.1, col = "black")
text(ell.at.01$x[1], ell.at.01$y[1], "NB-2", adj = -0.3)
text(ell.at.01$x[2], ell.at.01$y[2], "NB-1", adj =  1.3)
answer <- Confint.rrnb(fit.rrnb)
shift.down <- 1.3
text(answer$a21.hat, logLik(fit.rrnb) - shift.down, "RR-NB",
     adj =  0.5)  # "NB-P"
points(answer$a21.hat, logLik(fit.rrnb), pch = 20,
       cex = 1.1, col = "black")

# dev.off()


# ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# Fit a new model: a RR-Positive-NB


fit.rrposnb <- rrvglm(los ~ procedure + sex + admit + age75,
                      posnegbinomial(zero = NULL),
                      data = azpro, trace = TRUE)


summary(fit.rrposnb)


coef(fit.rrposnb, matrix = TRUE)


Coef(fit.rrposnb)


logLik(fit.rrposnb) - logLik(fit.rrnb)  # Large positive value


# ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,