# Code and data for Example 2 of "Effect of list errors on the estimation of population size" k<-3 Lvars<-scan() 0 0 1 13539 12732 13116 0 1 0 1093 874 1000 0 1 1 154 332 209 1 0 0 1073 819 991 1 0 1 134 357 193 1 1 0 53 149 71 1 1 1 18 51 22 X.list<-make.X.list(k) # prepare data junk<-matrix(Lvars,7,6,byrow=T) L1<-junk[,1] L2<-junk[,2] L3<-junk[,3] y.ab<-junk[,4] y.a<-junk[,5] y.b<-junk[,6] y3<-c(y.ab,y.a,y.b) # now start search for good starting values # the array result.dev contains the residual SS # for the regression to find an approximation to the true table ppA<-seq(-2,2,length=20) ppB<-seq(-2,2,length=20) result.dev<-matrix(0,20,20) for(i in (1:20)){ piA<-ppA[i] for(j in (1:20)){ piB<-ppB[j] stuff<-init.est(y3,X.list,k,piA,piB) result.dev[i,j]<-stuff$dev*all(stuff$y.est>0) }} # minimum value correspomds to piA= -0.7368421, piB= -0.1052632, # the 7,10 element of result.dev is the minimum piA<- ppA[7] piB<- ppB[10] # calculate the estimated true table y.est<-init.est(y3,X.list,k,piA,piB)$y.est # and the corresponding coefficients init.glm<-glm(y.est~L1+L2+L3+L1:L2+L2:L3,family=poisson) theta.start<-c(coef(init.glm),piA,piB) modmat<-model.matrix(init.glm) # now for the quasiliklihood fit quasifit<-solver.AB(y3, k, X.list, modmat, theta.start) [1] 1.870334 [1] 0.2871848 [1] 0.0100164 [1] 0.000133724 [1] 2.928094e-06 [1] 6.927299e-08 [1] 1.508225e-09 > quasifit $theta: [1] 8.4180854 -2.6921179 -2.5880667 0.9638985 0.9463045 -0.1065020 -0.7986801 [8] -0.1613698 $cov: (Intercept) L1 L2 L3 L1:L2 (Intercept) 0.031360369 -0.008489373 -0.022019873 -0.030550415 3.545371e-03 L1 -0.008489373 0.003347398 0.006144237 0.008206177 -2.682227e-03 L2 -0.022019873 0.006144237 0.024891438 0.021644014 -5.661547e-03 L3 -0.030550415 0.008206177 0.021644014 0.029850388 -3.393097e-03 L1:L2 0.003545371 -0.002682227 -0.005661547 -0.003393097 1.310768e-02 L2:L3 0.018293282 -0.005006838 -0.024774725 -0.018183815 4.200946e-03 -0.008107267 0.002110324 0.003114350 0.007826105 5.976251e-05 -0.005466024 0.001417884 0.002077625 0.005276232 7.851509e-05 L2:L3 (Intercept) 0.0182932818 -8.107267e-03 -5.466024e-03 L1 -0.0050068384 2.110324e-03 1.417884e-03 L2 -0.0247747252 3.114350e-03 2.077625e-03 L3 -0.0181838149 7.826105e-03 5.276232e-03 L1:L2 0.0042009460 5.976251e-05 7.851509e-05 L2:L3 0.0275856454 -1.174082e-03 -7.669081e-04 -0.0011740819 4.693547e-03 2.163727e-03 -0.0007669081 2.163727e-03 2.948188e-03 # Derived quantities # error probabilites 1/(1+exp(-quasifit$theta[7:8])) [1] 0.3103079 0.4597449 # estimate of N exp(quasifit$theta[1]) [1] 4528.225 # conf interval exp(quasifit$theta[1] +c(-1,1)*quasifit$cov[1,1]*1.96) [1] 4258.273 4815.291 bad.glm<-glm(y.ab~L1+L2+L3+L1:L2+L2:L3,family=poisson) > exp(bad.glm$coef[1]) (Intercept) 108413