# Analysis of the 2008 WFFC data # Concentrates on the log-linear modelling via quasipoissonff() and negbinomial() # Most analyses are run on the rivers only. Lakes are excluded in most cases. # T. W. Yee (2013) # ====================================================================== # Read the raw data in library("VGAM") # Requires Version 0.9-2 or later library("VGAMdata") # Requires Version 0.9-2 or later data(wffc) summary(wffc) dim(wffc) data(wffc.nc) # ====================================================================== # Use quasipoissonff() to fit a log-linear model. # Nb. "fnc" stands for factor version of "wffc.nc" fnc <- transform(wffc.nc, finame = factor(iname), fsector = factor(sector), fday = factor(ceiling(session / 2)), mornaft = 1 - (session %% 2), fbeatboat = factor(beatboat)) head(fnc) nrow(fnc) # ----------------------------------------------- # Delete people who fished less than 5 sessions? nrow(fnc) fnc <- fnc[with(fnc, !is.element(comid, c(99,72,80,93,45,71,97,78))),] nrow(fnc) # Should be 455 # Exclude the lakes (sectors 2 and 3): fnc.river <- fnc[abs(fnc$sector - 2.5) > 1,] # ----------------------------------------------- # Fit a quasipoissonff() model # Notes: # 1. Whanganui is the baseline river. # 2. Could use fday or day. # 3. Just to get phi-hat Fit.qp <- vglm(numbers ~ fsector / fbeatboat + mornaft + fday + finame, quasipoissonff, data = fnc.river, trace = TRUE) summary(Fit.qp) # Get phi-hat. AIC(Fit.qp) length(Fit.qp@y) length(coef(Fit.qp)) # Ratio of # of observations to # of parameters length(Fit.qp@y) / length(coef(Fit.qp)) # Save part of the object wffcdir <- "/tmp/" # Linux coef.Fit.qp.vglm.quasipoissonff <- coef(Fit.qp, mat = TRUE) save(coef.Fit.qp.vglm.quasipoissonff, file = paste(wffcdir, "coef.Fit.qp.vglm.quasipoissonff.Rdata", sep = "")) # Run these (cfit <- coef(Fit.qp)) ncfit <- names(cfit) sefit <- sqrt(diag(vcov(Fit.qp))) summary(Fit.qp) AIC(Fit.qp) # Graphical checks # Use glm() to fit the equivalent model, this is because plot.glm() does # several diagnostic plots, e.g,. involving deviance residuals. fit.glm.qp <- glm(numbers ~ fsector / fbeatboat + # Carefully think, esp. for lakes!! mornaft + fday + finame, quasipoisson, data = fnc.river, trace = TRUE) par(mfrow = c(2,2)) plot(fit.glm.qp, col = "blue") # (i) Competitor effects; ------------------------------- lcfit <- length(cfit) index1 <- (1:lcfit)[ncfit == "finameAlessandroSgrani"] index2 <- (1:lcfit)[ncfit == "finameYoshikoIzumiya"] comest <- c("finameAaronWest" = 0, cfit[index1:index2]) comest <- comest - mean(comest) plot(1:length(comest), comest, type = "b", xlab = "Competitor", ylab = "Centered effects") abline(h = 0, col = "blue", lty = "dashed") slist <- sort.list(comest) scomest <- comest[slist] scomest sd(scomest) # Normally distributed N(mucomest, sdcomest) ----------------------- hist(scomest, xlab = "Fitted competitor effects", border = "blue", main = "", breaks = 11) fitnorm <- vglm(scomest ~ 1, uninormal, trace = TRUE) coef(fitnorm, mat = TRUE) Coef(fitnorm) # Find out what country each person is in ----------------------- scountryvec <- names(scomest) slist1 <- sort.list(fnc.river$iname) # sortednames <- unique(sort(fnc.river$iname)) sortednames <- substr(names(scomest), start = 7, stop = nchar(names(scomest))) iptr <- 0 for(ii in sortednames) { iptr <- iptr + 1 temp <- fnc.river$country[fnc.river$iname == ii] scountryvec[iptr] <- temp[1] } scountryvec cbind(length(scomest):1, scomest, scountryvec) # Put the scores into a matrix, ranked by order statistic. rev.scomest <- rev(scomest) mymat <- matrix(0, nrow = 23, ncol = 5, dimnames = list(unique(sort(fnc.river$country)), as.character(1:5))) # ranking = length(scomest):1 cptr <- rep(1, len = 23) for(ii in length(scomest):1) { print(ii) index1 <- (1:nrow(mymat))[rownames(mymat) == scountryvec[ii]] mymat[scountryvec[ii], cptr[index1]] <- scomest[ii] cptr[index1] <- cptr[index1] + 1 } mymat[mymat == 0] <- NA mymat <- na.omit(mymat) par(mfrow = c(1,1), mar = c(5,4,1,6)+0.1, las = 1) mycol <- (1:(5+1))[-7] # omits yellow matplot(1:5, t(mymat), type = "b", col = mycol, lwd = 2, pch = "*", bty = "l", xlab = "Intra-team ranking", ylab = "Competitor effect") set.seed(123) text(x = jitter(rep(5.2, len = nrow(mymat)), amount = 0.001), y = jitter(mymat[,5], amount = 0.07), labels = rownames(mymat), adj = 0, xpd = TRUE, col = mycol) # ====================================================================== # Fit a negbinomial model. Fit.nb <- vglm(numbers ~ fsector / fbeatboat + # Carefully think, esp. for lakes!! mornaft + fday + finame, negbinomial, data = fnc.river, trace = TRUE) coef(Fit.nb, matrix = TRUE)[ ,] (cFit.nb <- coef(Fit.nb)) ncFit.nb <- names(cFit.nb) seFit.nb <- sqrt(diag(vcov(Fit.nb))) summary(Fit.nb) AIC(Fit.nb) # Fit a smaller model? # It shows all the terms are necessary. fit2.nb <- vglm(numbers ~ fsector + finame + # fday + mornaft, negbinomial(lmu = loge), data = fnc.river, trace = TRUE) # Likelihood ratio test LRT pchisq(2*(logLik(Fit.nb)-logLik(fit2.nb)), df = df.residual(fit2.nb) - df.residual(Fit.nb), lower.tail = FALSE) # detach("package:VGAM") # ======================================================================