bootstrap.func <- function(sp, defree = 10) { # bootstrap.func: # Outputs the index curve for a single bootstrap replicate. # Arguments are the species data frame sp, # with column headers marked "site", "year", and "count"; # and the required degrees of freedom for the fit, defree. # # example: species data frame is called cb (for common bird): # call: bootstrap.func(cb, 10) # but generally this function is only called from within the outer # bootstrap routines: i.e. the BATCH file bootstrap.in, or # the Splus function outer.boot.func. # # NOTE: SOME PARTS OF THIS FUNCTION ARE SPECIFIC TO THE GAM # FITTED ON count ~ as.factor(site) + s(year). # IF USING A DIFFERENT FORMULA, NEED TO MODIFY CODE: ESPECIALLY SEE # NOTE MARKED *** BELOW. # # first obtain the resampled data: assign("sp", sp, frame = 1) uniq.site <- unique(sp$site) Nentries <- length(sp$count) assign("Nentries", Nentries, frame = 1) Nyears <- length(unique(sp$year)) assign("Nyears", Nyears, frame = 1) Nsites <- length(uniq.site) # # take sample of sites to be included in the resample # (bootstrap across sites): sam <- sample(uniq.site, replace = T) # # elements.func lists the rows of the data frame sp that need to # be included in the resample, for an individual site x in the sample sam: elements.func <- function(x) { (1:Nentries)[sp$site == x] } elements <- lapply(sam, elements.func) # # elements is the set of rows of the data frame to be included in the # resample (with repetitions listed separately). It is a ragged list. dr.site <- rep(1:Nsites, as.vector(unlist(lapply(elements, length)))) # # dr.site is the vector of site levels for the data resample: these should # go from 1 to Nsites even though some sites have been included in the # resample more than once, because we want to fit the model to the # repetitions as if they were different sites. However, year and count # for the resample should be extracted directly from the original data # frame, sp. # elements <- as.vector(unlist(elements)) data.resample <- data.frame(site = dr.site, year = sp$year[elements], count = sp$count[elements]) # # fit GAM on this df assign("data.resample", data.resample, frame = 1) formula.name <- substitute(count ~ as.factor(site) + s(year, df = defree1) - 1, list(defree1 = defree)) # # this tango with formula.name is necessary so that both function gam() # and predict.gam() understand how to find the value of defree. dr.gam <- gam(formula.name, family = poisson(link = log), data = data.resample) # # dr.gam is the GAM on the data resample. year.base <- min(sp$year) - 1 dr.new <- data.frame(year = year.base+seq(1, Nyears), site = rep(min(dr.site), Nyears)) # # dr.new is a new data frame, made deliberately as small as possible to # extract the indices as quickly as possible. result <- predict.gam(dr.gam, type = "terms", terms = labels(dr.gam)[2], newdata = dr.new) # # *** IF CHANGING THE ORDER OF VARIABLES IN formula.name, OR FITTING GAM # WITH COVARIATES, NEED TO ALTER THE STATEMENT terms = labels(dr.gam)[2] # ABOVE *** yeareffect <- as.vector(result) # # this has extracted the year effect terms, s(year, df=defree) for # year ranging from 1 to Nyears indices <- exp(yeareffect)/exp(yeareffect[1]) indices }