# This file gives some example R code on how to fit # an exchangeable bivariate (cloglog) odds ratio model (EBcOM). # Here are some notes: # 1. Regression splines are used. # 2. Variables "temp1" is the temperature at time 1, # "pH2" is the soil pH at time 2, etc. # "ybin1" is the presence (1) or absence (0) of the species at time 1 etc. # 3. One species is modelled here. # 4. The odds ratio of exp(2) means a positive association between # the presence of the species at both times. # 5. The function NS() is a trick; it ensures that the common # regression spline has the same knots. # 6. The simulated data is modelled as a constrained quadratic ordination, # i.e., symmetric bell-shaped curves. # 7. The fitting uses the xij and form2 arguments of vglm()... see the # documentation at http://www.stat.auckland.ac.nz/~yee/VGAM # 8. This example smooths against the known latent variable. In practice, # one could use optim() to determine the constrained coefficients; # at the current value of the constrained coefficients form thei # latent variable and then fit the EBcOM. # 9. This file was last modified: # 20090523, by Thomas Yee. # 20221025, by Thomas Yee---cloglog changed to "clogloglink". # ====================================================================== library("VGAM") # The VGAM package is required, version 0.7-9 or later. # ====================================================================== # Generate some data nsites <- 1000 # Number of sites spp.opt <- 6 # Species' optimum spp.tol <- 1 # Species' tolerance # Time 1's environment: mydat = data.frame(temp1 = rnorm(nsites, 10, 2), pH1 = rnorm(nsites, 5, 1)) # The temperature and pH at time 2 is a perturbation of time 1's values: mydat = transform(mydat, temp2 = temp1 + rnorm(nsites, sd=0.2), pH2 = pH1 + rnorm(nsites, sd=0.1)) # The constrained coefficients are made up. mydat = transform(mydat, nu1 = 0.2 * temp1 + 0.8 * pH1, nu2 = 0.2 * temp2 + 0.8 * pH2, oratio = exp(2)) # Quadratic ==> constrained quadratic ordination: mydat = transform(mydat, eta1 = 1 - 0.5 * ((nu1 - spp.opt) / spp.tol)^2, eta2 = 1 - 0.5 * ((nu2 - spp.opt) / spp.tol)^2, oratio = exp(2)) mydat = transform(mydat, mu1 = clogloglink(eta1, inverse=TRUE), mu2 = clogloglink(eta2, inverse=TRUE)) # Look at the data head(mydat) summary(mydat) # Generate presence/absence species data: tmat <- with(mydat, rbinom2.or(nsites, mu1=mu1, mu2=mu2, oratio=oratio)) mydat <- transform(mydat, ybin1 = tmat[,1], ybin2 = tmat[,2]) # Look at the data with(mydat, table(ybin1,ybin2)) / nsites # For interest only # ====================================================================== # Various plots of the data, for interest only par(mfrow=c(2,2)) with(mydat, plot(jitter(ybin1) ~ eta1, col="blue")) with(mydat, plot(jitter(ybin2) ~ eta2, col="blue")) with(mydat, plot(jitter(ybin2) ~ jitter(ybin1), col="blue")) tmat2 <- with(mydat, dbinom2.or(mu1=mu1, mu2=mu2, oratio=oratio)) ooo <- with(mydat, order(eta1)) with(mydat, matplot(eta1[ooo], tmat2[ooo,], col=1:4, type="l", ylim=0:1, ylab="Probability", main="Joint probabilities")) # ====================================================================== # Fit the model NS <- function(x, ..., df=3) ns(c(x,...), df=df)[1:length(x),,drop=FALSE] fit1.ebcom = vglm(cbind(ybin1,ybin2) ~ NS(nu1,nu2), fam=binom2.or(lmu="clogloglink", exchangeable=TRUE, zero=3), xij = list(NS(nu1,nu2) ~ NS(nu1,nu2) + NS(nu2,nu1) + NS(nu1)), form2 = ~ NS(nu1,nu2) + NS(nu2,nu1) + NS(nu1) + nu1 + nu2, data=mydat, trace=TRUE) # ====================================================================== # Look at the fit coef(fit1.ebcom, matrix=TRUE) summary(fit1.ebcom) vcov(fit1.ebcom) # The plot should be quadratic par(mfrow=c(1,1)) plotvgam(fit1.ebcom, scol='blue', xlab="nu1", se=TRUE, lcol="darkgreen", llwd=2) # ======================================================================