# Script of code for running the hormone assay example.
# Section 7.1 of "Reduced-rank vector generalized linear models with two
# linear predictors"
# by T. W. Yee, appearing in Computational Statistics & Data Analysis.


# Last modified:
# 20120730; 20121222; 20130104; 20130116, 20131002, 20140519


# Assumes the following packages have been installed:
# 1. VGAM 0.9-4 or higher. The latest version is at
#    http://www.stat.auckland.ac.nz/~yee/VGAM/prerelease
#    (and possibly CRAN).
# 2. VGAMdata 0.9-4 or higher. See http://www.stat.auckland.ac.nz/~yee
#    for the latest version (and possibly CRAN).


# Notes:
# 1. See also the help file for hormone for more examples, i.e.,
#  type "help(hormone)"
#  after typing "library(VGAM)"


library("VGAM")
library("VGAMdata")


# ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# Read the data in

data("hormone", package = "VGAM")  # In VGAM 0.8-8 or higher

with(hormone, plot(X, Y, col = "blue"))

dim(hormone)

summary(hormone)


# ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# Fit some regressions


# The best model.
modelI <-
  rrvglm(Y ~ 1 + X,
         uninormal(zero = NULL, lsd = "identitylink", imethod = 2),
         data = hormone, trace = TRUE)


# Alternative way to fit modelI
modelI.other <- vglm(Y ~ 1 + X,
                     uninormal(zero = NULL, lsd = "identitylink"),
                     data = hormone, trace = TRUE)


# Do some checking
max(abs(fitted(modelI.other) - fitted(modelI)))  # Should be 0
max(abs(coef(modelI.other, matrix = TRUE) -
        coef(modelI, matrix = TRUE)))  # Should be 0


# Inferior to modelI
modelII <- vglm(Y ~ 1 + X,
                family = uninormal(zero = NULL),
                data = hormone,
                trace = TRUE)


logLik(modelI)
logLik(modelII)  # Less than logLik(modelI)


Coef(modelI)


coef(modelI, matrix = TRUE)


summary(modelI)  # See if a21 is significant or not


head(predict(modelI))
tail(predict(modelI))


# Obtain a 95 percent prediction interval ,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# Formula is in, e.g., Faraway, 2005, "LMs With R", p.37.

Xlm <- model.matrix(modelI, type = "lm")
nnn <- nrow(Xlm)
ppp <- nnn * modelI@misc$M - df.residual(modelI)
predse <- Xlm %*% solve( t(Xlm) %*% Xlm ) %*% t(Xlm)
predse <- predict(modelI)[, "sd"] * qt(1 - 0.05/2, df = nnn - ppp) *
          sqrt(1 + diag(predse))


# ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
# Plot the data and modelI

par(mfrow = c(1, 2), las = 1, xpd = FALSE, mar = c(4.0, 4.1, 0.3, 0.3),
    oma = rep(0, len = 4))


# (a) Plot the data and fitted values
with(hormone, plot(X, Y, las = 1, xlab = "x", ylab = "y",
                   col = "blue"))
lines(with(hormone, X), fitted(modelI), col = "darkorange")
lines(with(hormone, X), fitted(modelI) + predse,
      col = "darkorange", lty = "dashed")
lines(with(hormone, X), fitted(modelI) - predse,
      col = "darkorange", lty = "dashed")

legend("topleft", leg = "(a)", bty = "n", cex = 1.2)


# (b) Plot the data and fitted values (but zoomed in)
with(hormone, plot(X, Y, las = 1, xlab = "x", ylab = "",
                  xlim = c(0, 13.0),
                  ylim = c(0, 10),
                  col = "blue"))
lines(with(hormone, X), fitted(modelI), col = "darkorange")

# Add 95 percent prediction interval to the fitted values,
# cf. Carroll and Ruppert, 1988, Section 2.9.5.
lines(with(hormone, X), fitted(modelI) + predse,
      col = "darkorange", lty = "dashed")
lines(with(hormone, X), fitted(modelI) - predse,
      col = "darkorange", lty = "dashed")

legend("topleft", leg = "(b)", bty = "n", cex = 1.2)


# ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,