bootstrap.func <- function(sp, defree = 10) { # bootstrap.func: # Outputs the index curve for a single bootstrap replicate. # Arguments are the species data frame sp, # with column headers marked "site", "year", and "count"; # and the required degrees of freedom for the fit, defree. # # example: species data frame is called cb (for common bird): # call: bootstrap.func(cb, 10) # but generally this function is only called from within the outer # bootstrap routine, outer.boot.func. # # NOTE: SOME PARTS OF THIS FUNCTION MAY BE SPECIFIC TO THE GAM # FITTED ON count ~ as.factor(site) + s(year). # IF USING A DIFFERENT FORMULA, NEED TO MODIFY CODE: ESPECIALLY SEE # NOTE MARKED *** BELOW. # # first obtain the resampled data: uniq.site <- unique(sp$site) Nentries <- length(sp$count) Nyears <- length(unique(sp$year)) Nsites <- length(uniq.site) # # take sample of sites to be included in the resample # (bootstrap across sites): sam <- sample(uniq.site, replace = T) # # elements.func lists the rows of the data frame sp that need to # be included in the resample, for an individual site x in the sample sam: elements.func <- function(x) { (1:Nentries)[sp$site == x] } elements <- lapply(sam, elements.func) # # elements is the set of rows of the data frame to be included in the # resample (with repetitions listed separately). It is a ragged list. dr.site <- rep(1:Nsites, as.vector(unlist(lapply(elements, length)))) # # dr.site is the vector of site levels for the data resample: these should # go from 1 to Nsites even though some sites have been included in the # resample more than once, because we want to fit the model to the # repetitions as if they were different sites. However, year and count # for the resample should be extracted directly from the original data # frame, sp. # elements <- as.vector(unlist(elements)) data.resample <- data.frame(site = dr.site, year = sp$year[elements], count = sp$count[elements]) # # fit GAM on this df using MGCV library: dr.gam <- gam(count~as.factor(site) + s(year, fx=TRUE, k=defree+1), family = poisson(link = log), data = data.resample) # # dr.gam is the GAM on the data resample. # # Now make out a new data frame consisting of all years (in case some were # omitted from data.resample) and all sites in data.resample. # The predict feature doesn't seem to work unless all sites are included. year.base <- min(sp$year)-1 dr.new <- expand.grid(year=year.base+1:Nyears, site=sort(unique(dr.site))) result <- predict.gam(dr.gam, newdata=dr.new, type = "terms") srow <- length(result[1,]) # *** For old MGCV versions, change line above to # srow <- length(result[,1]) # # srow is the row containing the smooth term in the predict object # # *** IF CHANGING THE ORDER OF VARIABLES IN FORMULA, OR FITTING GAM # WITH COVARIATES, NEED TO CHECK WHETHER IT IS STILL THE SAME ROW REQUIRED *** # pred.allyears <- result[1:Nyears, srow] # *** For old MGCV versions, change line above to # pred.allyears <- result[srow, 1:Nyears] indices <- exp(pred.allyears)/exp(pred.allyears[1]) # indices }