############################################################# # # Functions for Example 2 of paper "Effect of list errors on the # estimate of population size" # ############################## binmat<-function(i, n){ # returns a matrix of binary digits. Rows contain the binary # expansion of the corresponding integer in vector i iv <- matrix(0,length(i), n) for(j in (n:1)) { iv[,j] <- i %% 2 i <- i %/% 2 } iv } ############################### make.log.pAB<-function(k,piA,piB){ # constructs a list containing the components of the # the log of the p-vector and its derivatives with respect to piA and piB # for the case where tags have separate loss probabilities. # piA=logit(prob of A tag loss), similarly for piB pA<-1/(1+exp(-piA)) pB<-1/(1+exp(-piB)) K<-2^k-1 log.p<-as.list(1:K) d.log.pA<-as.list(1:K) d.log.pB<-as.list(1:K) for(cell in (1:K)){ r<-sum(binmat(cell,k)) m<-binmat(0:(4^r-1),2*r) xA<-apply(as.matrix(m[,rep(c(T,F),r)]),1,sum) xB<-apply(as.matrix(m[,rep(c(F,T),r)]),1,sum) log.p[[cell]]<- xA*piA + xB*piB - r*log(1+exp(piA)) - r*log(1+exp(piB)) d.log.pA[[cell]]<-xA-r*pA d.log.pB[[cell]]<-xB-r*pB } list(log.p,d.log.pA,d.log.pB) } ############################################### make.AD.rhs.AB<-function(y,k,X.list,modmat,theta){ # calculates mu, D and V for the quasilikelihood fit # where tag loss probs are different. Returns # t(D)more.penrose(V)D and t(D)more.penrose(V)(y-mu) # for use in the fisher scoring routine solver.AB mu<-0 V<-0 D1<-0 D21<-0 D22<-0 r<-length(theta) piA<-theta[r-1] piB<-theta[r] beta<-theta[1:(r-2)] stuff<-make.log.pAB(k,piA,piB) log.p<-stuff[[1]] d.log.pA<-stuff[[2]] d.log.pB<-stuff[[3]] lambda<-as.vector(exp(modmat%*%beta)) K<-2^k-1 for (j in (1:K)){ Xj<-X.list[[j]] pj<-exp(log.p[[j]]) muj<-as.vector(lambda[j]*Xj%*%pj) mu<-mu + muj V<-V + lambda[j]*(Xj%*%(pj*t(Xj))) D1<-D1 + outer(muj,modmat[j,]) D21<-D21 + lambda[j]*Xj%*%(pj*d.log.pA[[j]]) D22<-D22 + lambda[j]*Xj%*%(pj*d.log.pB[[j]]) } D<-cbind(D1,D21,D22) A <- t(D) %*% moore.penrose(V) list(AD=A%*%D,rhs=A %*% (y - mu)) } ############################################################################### solver.AB<-function(y, k, X.list, modmat, theta, maxiter = 20, tol = 1e-08) { # Solves estimating equation # y: vector containing the 3 observed tables in standard order # X.list: output from makeX.list # modmat: model matrix from poisson regression, output from # SPlus function model.matrix # theta=(beta,pi): starting values del <- 2 * tol iter <- 1 oldS<-sum(abs(theta)) while(tol < sum(abs(del)) & iter <= maxiter) { stuff<-make.AD.rhs.AB(y,k, X.list, modmat, theta) del<-solve(stuff$AD,stuff$rhs) newS<-sum(abs(del)) print("newS") print(newS) while(oldStol]<-1/d[d>tol] stuff$v%*%(t(stuff$u)*d) } ################################################################# make.X.all<-function(k){ # calculates the X matrix for k lists # k must be a positive integer if(k<1||round(k)!=k)stop("k must be a positive integer") rbind(make.X(k,"AB"),make.X(k,"A"),make.X(k,"B")) } ################################################################ make.X<-function(k,type){ # calculates the part of the X-matrix for k lists # ``match on AB'' part: type="AB" # ``match on A'' part: type="A" # ``match on B'' part: type="B" L<-2^k-1 X<-matrix(0,L,5^k-1) column.index<-0 # for(cc in (1:L)){ # cycle over cells cvec<-binrep(cc,k) for(j in 0:(4^sum(cvec)-1)){ # cycle over error patterns column.index<-column.index+1 # function classify constructs the column # for cell cc, error pattern j X[,column.index]<-classify(j,cvec,k,type) } } X } #################################################################### make.X.list<-function(k){ # same as make.X.all except returns X in the form of a list L<-2^k-1 X.list<-as.list(1:L) for(cell in (1:L)){ cvec<-binrep(cell,k) Jc<-4^sum(cvec) XAB<-matrix(0,L,Jc) XA<-XAB XB<-XAB index<-0 for(j in 1:Jc){ index<-index+1 XAB[,index]<-classify(j-1,cvec,k,"AB") XA[,index]<-classify(j-1,cvec,k,"A") XB[,index]<-classify(j-1,cvec,k,"B") } X.list[[cell]]<-rbind(XAB,XA,XB) } X.list } ######################################################### binrep<-function(j, k) { # returns the binary representation of integer j as a k-vector of binary digits result <- rep(0, k) for(i in (k:1)) { result[i] <- j %% 2 j <- j %/% 2 } result } ######################################################### classify<-function(j,cvec,k,type){ # calculates column of X corresponding to error pattern j and cell c #(with binary representation cvec) of the original table, # matching on record A (type="A"), record B (type="B") or both records (type="AB") L<-2^k-1 x<-numeric(L) lists<-numeric(k) r<-sum(cvec) evens<-2*(1:r) odds<-evens-1 error.pattern<-as.logical(binrep(j,2*r)) error.vec<-if(type=="A") error.pattern[odds] else if(type=="B")error.pattern[evens] else error.pattern[odds]|error.pattern[evens] lists[cvec==1]<-1 lists[cvec==1]<-lists[cvec==1] + error.vec # lists has 0 if list not in cell cvec, # 1 if no error, 2 if error according to match definition decimal<-sum((lists==1)*(2^((k-1):0))) # lists==1 is binary representation of cell S0 # (set of lists with no errors, see text) # decimal its decimal equivalent x[decimal]<-1 # now do cells corresponding to the errors x[(2^((k-1):0))[lists==2]]<-1 x } ###################################################################### # # Extra functions required for Example 1, # same error probability for both tags make.log.p<-function(k, pi){ # constructs a list containing the components of the # the log of the p-vector and its derivative with respect to pi p <- 1/(1 + exp( - pi)) K <- 2^k - 1 log.p <- as.list(K) d.log.p <- as.list(K) for(j in (1:K)) { r <- sum(binmat(j, k)) m <- apply(binmat(0:(4^r - 1), 2 * r), 1, sum) log.p[[j]] <- m * pi - 2 * r * log(1 + exp(pi)) d.log.p[[j]] <- m - 2 * r * p } list(log.p, d.log.p) } ######################################################### solver<-function(y, k, X.list, modmat, theta, maxiter = 20, tol = 1e-08) { # Solves estimating equation # y: vector containing the 3 observed tables in standard order # X.list: output from makeX.list # modmat: model matrix from poisson regression # theta=(beta,pi): starting values del <- 2 * tol iter <- 1 oldS <- sum(abs(theta)) while(tol < sum(abs(del)) & iter <= maxiter) { stuff <- make.AD.rhs(k, X.list, modmat, theta) del <- solve(stuff$AD, stuff$rhs) theta <- theta + del print(sum(abs(del))) iter <- iter + 1 } list(theta = as.vector(theta), cov = solve(stuff$AD), iter = iter - 1) } ######################################################################## make.AD.rhs<-function(k, X.list, modmat, theta){ # calculates mu, D and V for the quasilikelihood fit # when both error probabilities are the same mu <- 0 V <- 0 D1 <- 0 D2 <- 0 r <- length(theta) pi <- theta[r] beta <- theta[ - r] stuff <- make.log.p(k, pi) log.p <- stuff[[1]] d.log.p <- stuff[[2]] lambda <- as.vector(exp(modmat %*% beta)) K <- 2^k - 1 for(j in (1:K)) { Xj <- X.list[[j]] pj <- exp(log.p[[j]]) muj <- as.vector(lambda[j] * Xj %*% pj) mu <- mu + muj V <- V + lambda[j] * Xj %*% (pj * t(Xj)) D1 <- D1 + outer(muj, modmat[j, ]) D2 <- D2 + lambda[j] * Xj %*% (pj * d.log.p[[j]]) } D <- cbind(D1, D2) A <- t(D) %*% moore.penrose(V) list(AD = A %*% D, rhs = A %*% (y - mu)) } ########################################################### get.var<-function(k, X.list, modmat, theta) { # calculates covariance matrix of thetahat, # given value of theta mu <- 0 V <- 0 D1 <- 0 D2 <- 0 r <- length(theta) piA <- theta[r] beta <- theta[1:(r - 1)] stuff <- make.log.p(k, piA) log.p <- stuff[[1]] d.log.p <- stuff[[2]] lambda <- as.vector(exp(modmat %*% beta)) K <- 2^k - 1 for(j in (1:K)) { pj <- exp(log.p[[j]]) Xj <- X.list[[j]] muj <- as.vector(lambda[j] * Xj %*% pj) mu <- mu + muj V <- V + lambda[j] * (Xj %*% (pj * t(Xj))) D1 <- D1 + outer(muj, modmat[j, ]) D2 <- D2 + lambda[j] * Xj %*% (pj * d.log.p[[j]]) } D <- cbind(D1, D2) A <- t(D) %*% moore.penrose(V) solve(A %*% D) }