# This file gives some example R code on how to fit
# an exchangeable bivariate (cloglog) odds ratio model (EBcOM).

# Here are some notes:
# 1. Regression splines are used.
# 2. Variables "temp1" is the temperature at time 1,
#    "pH2" is the soil pH at time 2, etc.
#    "ybin1" is the presence (1) or absence (0) of the species at time 1 etc.
# 3. One species is modelled here.
# 4. The odds ratio of exp(2) means a positive association between
#    the presence of the species at both times.
# 5. The function NS() is a trick; it ensures that the common
#    regression spline has the same knots.
# 6. The simulated data is modelled as a constrained quadratic ordination,
#    i.e., symmetric bell-shaped curves.
# 7. The fitting uses the xij and form2 arguments of vglm()... see the
#    documentation at http://www.stat.auckland.ac.nz/~yee/VGAM
# 8. This example smooths against the known latent variable. In practice,
#    one could use optim() to determine the constrained coefficients;
#    at the current value of the constrained coefficients form thei
#    latent variable and then fit the EBcOM.
# 9. This file was last modified:
#    20090523, by Thomas Yee.
#    20221025, by Thomas Yee---cloglog changed to "clogloglink".


# ======================================================================
library("VGAM")  # The VGAM package is required, version 0.7-9 or later.


# ======================================================================
# Generate some data

nsites <- 1000 # Number of sites
spp.opt <- 6   # Species' optimum
spp.tol <- 1   # Species' tolerance

# Time 1's environment:
mydat = data.frame(temp1 = rnorm(nsites, 10, 2),
                   pH1   = rnorm(nsites,  5, 1))

# The temperature and pH at time 2 is a perturbation of time 1's values:
mydat = transform(mydat,
                  temp2 = temp1 + rnorm(nsites, sd=0.2),
                  pH2   = pH1   + rnorm(nsites, sd=0.1))

# The constrained coefficients are made up.
mydat = transform(mydat,
                  nu1 =  0.2 * temp1 + 0.8 * pH1,
                  nu2 =  0.2 * temp2 + 0.8 * pH2,
                  oratio = exp(2))

# Quadratic ==> constrained quadratic ordination:
mydat = transform(mydat,
                  eta1 =  1 - 0.5 * ((nu1 - spp.opt) / spp.tol)^2,
                  eta2 =  1 - 0.5 * ((nu2 - spp.opt) / spp.tol)^2,
                  oratio = exp(2))
mydat = transform(mydat, mu1 = clogloglink(eta1, inverse=TRUE),
                         mu2 = clogloglink(eta2, inverse=TRUE))
# Look at the data
head(mydat)
summary(mydat)

# Generate presence/absence species data:
tmat <-  with(mydat, rbinom2.or(nsites, mu1=mu1, mu2=mu2, oratio=oratio))
mydat <- transform(mydat, ybin1 = tmat[,1], ybin2 = tmat[,2])

# Look at the data
with(mydat, table(ybin1,ybin2)) / nsites  # For interest only


# ======================================================================
# Various plots of the data, for interest only

par(mfrow=c(2,2))
with(mydat, plot(jitter(ybin1) ~ eta1, col="blue"))
with(mydat, plot(jitter(ybin2) ~ eta2, col="blue"))

with(mydat, plot(jitter(ybin2) ~ jitter(ybin1), col="blue"))

tmat2 <- with(mydat, dbinom2.or(mu1=mu1, mu2=mu2, oratio=oratio))
ooo <- with(mydat, order(eta1))
with(mydat, matplot(eta1[ooo], tmat2[ooo,], col=1:4, type="l", ylim=0:1,
     ylab="Probability", main="Joint probabilities"))


# ======================================================================
# Fit the model

NS <- function(x, ..., df=3) ns(c(x,...), df=df)[1:length(x),,drop=FALSE]

fit1.ebcom = vglm(cbind(ybin1,ybin2) ~ NS(nu1,nu2),
                  fam=binom2.or(lmu="clogloglink", exchangeable=TRUE, zero=3),
                  xij = list(NS(nu1,nu2) ~ NS(nu1,nu2) +
                                           NS(nu2,nu1) +
                                           NS(nu1)),
                  form2 = ~  NS(nu1,nu2) +
                             NS(nu2,nu1) +
                             NS(nu1) +
                             nu1 + nu2,
                  data=mydat, trace=TRUE)


# ======================================================================
# Look at the fit


coef(fit1.ebcom, matrix=TRUE)
summary(fit1.ebcom)
vcov(fit1.ebcom)


# The plot should be quadratic
par(mfrow=c(1,1))
plotvgam(fit1.ebcom, scol='blue', xlab="nu1", se=TRUE,
         lcol="darkgreen", llwd=2)


# ======================================================================