# Script of code for running the nmes example. # Section 7.3 of "Reduced-rank vector generalized linear models with two # linear predictors" # by T. W. Yee, appearing in Computational Statistics & Data Analysis. # Last modified: # 20120206; 20120730; 20130104; 20130116; 20130206, 20131002 # Assumes the following packages have been installed: # 1. VGAM 0.9-2 or higher, currently at # http://www.stat.auckland.ac.nz/~yee/VGAM/prerelease # and CRAN. # 2. VGAMdata 0.9-2 or higher, currently at CRAN. # 3. AER. library("VGAM") library("VGAMdata") # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # Get the data ready ########################################## ## Demand for medical care in NMES 1988 ## ########################################## # Initially code was from # http://127.0.0.1:29029/library/AER/html/CameronTrivedi1998.html ## select variables for analysis data("NMES1988", package = "AER") head(NMES1988) nmes <- NMES1988[, -(2:6)] ## dependent variable ## Table 6.1 table(cut(nmes$visits, c(0:13, 100)-0.5, labels = 0:13)) dim(nmes) head(nmes) # No 'hospital' variable # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # Now fit some regressions ## Standard Poisson regression fit.pois <- vglm(visits ~ ., poissonff, data = nmes, trace = TRUE) summary(fit.pois) ## NegBin regression (NB-2) fit.nb2 <- vglm(visits ~ ., negbinomial, data = nmes, trace = TRUE) ## NB-P: better set.seed(1) fit.rrnb <- rrvglm(visits ~ ., negbinomial(zero = NULL), data = nmes, trace = TRUE) coef(fit.rrnb, matrix = TRUE) Coef(fit.rrnb) # Likelihood ratio test lrtest(fit.nb2, fit.rrnb) # Easy way 2 * (logLik(fit.rrnb) - logLik(fit.nb2)) Confint.rrnb(fit.rrnb) # Confidence interval for delta2 is here summary(fit.rrnb) head(predict(fit.rrnb)) tail(predict(fit.rrnb)) constraints(fit.rrnb) ## NB-1: fit.nb1 <- vglm(visits ~ ., negbinomial(parallel = TRUE, zero = NULL), data = nmes, trace = TRUE) lrtest(fit.nb1, fit.rrnb) Confint.nb1(fit.nb1) ## ZA-NB with size=intercept-only(is better than NB-1) ,,,,,,,,,,,,,,,,,, ## Same as Cameron and Trivedi, Table 6.8, p.204. fit.zanb <- vglm(visits ~ ., zanegbinomial(zero = 3), data = nmes, trace = TRUE) # Same as Cameron and Trivedi, Table 6.8, p.204: 1/exp(0.2959866) is 1/size coef(fit.zanb, matrix = TRUE) summary(fit.zanb) fit.zanb # ZI-NB (is not as good as ZA-NB) ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, fit.zinb <- vglm(visits ~ ., zinegbinomial(zero = 3, nsimEIM = 500), data = nmes, trace = TRUE) coef(fit.zinb, matrix = TRUE) # fit.zinb logLik(fit.zinb) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Full ZA-NB (is better than ZA-NB with size=intercept-only) # Called a ZA-NB-H in my paper (Table 5). Fit.zanb <- vglm(visits ~ ., zanegbinomial(zero = NULL, nsimEIM = 500), # epsilon = 1e-8, data = nmes, trace = TRUE) round(coef(Fit.zanb, matrix = TRUE), 3) Fit.zanb logLik(Fit.zanb) - c(logLik(fit.zanb)) # LRT shows Fit.zanb is better than fit.zanb. lrtest(fit.zanb, Fit.zanb) summary(Fit.zanb) # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, # Fit some intermediary models where some explanatory variables # appear in one linear/additive predictors only. # Here is a NB-H fit.nbh1 <- vglm(visits ~ ., negbinomial(zero = NULL), data = nmes, trace = TRUE) coef(fit.nbh1, matrix = TRUE) summary(fit.nbh1) # Shows school is not significant for size # So fit a simplified model ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, clist <- list("(Intercept)" = diag(2), # "hospital" = rbind(1, 0), # Remove 'hospital' from 'size' "health" = diag(2), "chronic" = diag(2), "gender" = diag(2), "school" = rbind(1, 0), # Remove 'school' from 'size' "insurance" = diag(2)) fit.nbh2 <- vglm(visits ~ ., negbinomial(zero = NULL), constraints = clist, etastart = predict(fit.nbh1), # Converges faster data = nmes, trace = TRUE) summary(fit.nbh2) # Shows 'everything' is significant. # Likelihood ratio test: shows the simplified model is okay lrtest(fit.nbh1, fit.nbh2) # Easy way pchisq(2 * (logLik(fit.nbh1) - logLik(fit.nbh2)), df = df.residual(fit.nbh2) - df.residual(fit.nbh1), lower.tail = FALSE) # Manual way # ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,