# Analysis of the 2008 WFFC data
# Concentrates on the log-linear modelling via quasipoissonff() and negbinomial()
# Most analyses are run on the rivers only. Lakes are excluded in most cases.
# T. W. Yee (2013)


# ======================================================================
# Read the raw data in

library("VGAM")      # Requires Version 0.9-2 or later
library("VGAMdata")  # Requires Version 0.9-2 or later


data(wffc)
summary(wffc)
dim(wffc)


data(wffc.nc)


# ======================================================================
# Use quasipoissonff() to fit a log-linear model.

# Nb. "fnc" stands for factor version of "wffc.nc"
fnc <- transform(wffc.nc,
                 finame = factor(iname),
                 fsector = factor(sector),
                 fday = factor(ceiling(session / 2)),
                 mornaft = 1 - (session %% 2),
                 fbeatboat = factor(beatboat))

head(fnc)
nrow(fnc)


# -----------------------------------------------
# Delete people who fished less than 5 sessions?
nrow(fnc)
fnc <- fnc[with(fnc, !is.element(comid, c(99,72,80,93,45,71,97,78))),] 
nrow(fnc)   # Should be 455

# Exclude the lakes (sectors 2 and 3):
  fnc.river <- fnc[abs(fnc$sector - 2.5) > 1,]

# -----------------------------------------------

# Fit a quasipoissonff() model

# Notes:
# 1. Whanganui is the baseline river.
# 2. Could use fday or day.
# 3. Just to get phi-hat 

Fit.qp <- vglm(numbers ~ fsector / fbeatboat + mornaft + fday + finame,
              quasipoissonff, data = fnc.river, trace = TRUE)
summary(Fit.qp) # Get phi-hat.
AIC(Fit.qp)
length(Fit.qp@y)
length(coef(Fit.qp))
# Ratio of # of observations to # of parameters
length(Fit.qp@y) / length(coef(Fit.qp))


# Save part of the object
wffcdir <- "/tmp/"  # Linux
coef.Fit.qp.vglm.quasipoissonff <- coef(Fit.qp, mat = TRUE)
save(coef.Fit.qp.vglm.quasipoissonff,
     file = paste(wffcdir, "coef.Fit.qp.vglm.quasipoissonff.Rdata", sep = ""))


# Run these 
(cfit <- coef(Fit.qp))
ncfit <- names(cfit)
sefit <- sqrt(diag(vcov(Fit.qp)))
summary(Fit.qp)
AIC(Fit.qp)


# Graphical checks
# Use glm() to fit the equivalent model, this is because plot.glm() does
# several diagnostic plots, e.g,. involving deviance residuals.
fit.glm.qp <- glm(numbers ~ fsector / fbeatboat +   # Carefully think, esp. for lakes!!
                           mornaft + fday + finame,
                 quasipoisson,
                 data = fnc.river, trace = TRUE)
par(mfrow = c(2,2))
plot(fit.glm.qp, col = "blue")


# (i) Competitor effects; -------------------------------
lcfit <- length(cfit)
index1 <- (1:lcfit)[ncfit == "finameAlessandroSgrani"]
index2 <- (1:lcfit)[ncfit == "finameYoshikoIzumiya"]
comest <- c("finameAaronWest" =  0, cfit[index1:index2])
comest <- comest - mean(comest)
plot(1:length(comest), comest, type = "b",
        xlab = "Competitor", ylab = "Centered effects")
abline(h = 0, col = "blue", lty = "dashed")

slist <- sort.list(comest)
scomest <- comest[slist]
scomest

sd(scomest)


# Normally distributed N(mucomest, sdcomest) -----------------------
hist(scomest, xlab = "Fitted competitor effects",
     border = "blue", main = "", breaks = 11)

fitnorm <- vglm(scomest ~ 1, uninormal, trace = TRUE)
coef(fitnorm, mat = TRUE)
Coef(fitnorm)


# Find out what country each person is in -----------------------

scountryvec <- names(scomest)
slist1 <- sort.list(fnc.river$iname)
# sortednames <- unique(sort(fnc.river$iname))
sortednames <- substr(names(scomest), start = 7,
                     stop = nchar(names(scomest)))
iptr <- 0
for(ii in sortednames) {
    iptr <- iptr + 1
    temp <- fnc.river$country[fnc.river$iname == ii]
    scountryvec[iptr] <- temp[1]
}
scountryvec


cbind(length(scomest):1, scomest, scountryvec)


# Put the scores into a matrix, ranked by order statistic.
rev.scomest <- rev(scomest)

mymat <- matrix(0, nrow = 23, ncol = 5,
               dimnames = list(unique(sort(fnc.river$country)), 
                             as.character(1:5)))
# ranking = length(scomest):1
cptr <- rep(1, len = 23)
for(ii in length(scomest):1) {
    print(ii)
    index1 <- (1:nrow(mymat))[rownames(mymat) == scountryvec[ii]]
    mymat[scountryvec[ii], cptr[index1]] <- scomest[ii]
    cptr[index1] <- cptr[index1] + 1
}

mymat[mymat == 0]  <- NA

mymat <- na.omit(mymat)


par(mfrow = c(1,1), mar = c(5,4,1,6)+0.1, las = 1)
mycol <- (1:(5+1))[-7]  # omits yellow
matplot(1:5, t(mymat), type = "b", col = mycol, lwd = 2, pch = "*", bty = "l",
        xlab = "Intra-team ranking",
        ylab = "Competitor effect")
set.seed(123)
text(x = jitter(rep(5.2, len = nrow(mymat)), amount = 0.001),
     y = jitter(mymat[,5], amount = 0.07),
     labels = rownames(mymat), adj = 0, xpd = TRUE, col = mycol)


# ======================================================================
# Fit a negbinomial model.


Fit.nb <- vglm(numbers ~ fsector / fbeatboat +   # Carefully think, esp. for lakes!!
              mornaft + fday + finame,
              negbinomial,
              data = fnc.river, trace = TRUE)

coef(Fit.nb, matrix = TRUE)[   ,]
(cFit.nb <- coef(Fit.nb))
ncFit.nb <- names(cFit.nb)
seFit.nb <- sqrt(diag(vcov(Fit.nb)))
summary(Fit.nb)
AIC(Fit.nb)


# Fit a smaller model?
# It shows all the terms are necessary.
fit2.nb <- vglm(numbers ~ fsector + finame +
#                        fday +
                         mornaft,
               negbinomial(lmu = loge),
               data = fnc.river, trace = TRUE)
# Likelihood ratio test LRT
pchisq(2*(logLik(Fit.nb)-logLik(fit2.nb)),
       df = df.residual(fit2.nb) - df.residual(Fit.nb), lower.tail = FALSE)

# detach("package:VGAM")

# ======================================================================