# Code and data for Example 2 of
"Effect of list errors on the estimation of population size" 

k<-3
Lvars<-scan()
 0 0 1 	13539 12732 13116
 0 1 0 1093 874 1000
 0 1 1 154 332 209
 1 0 0 1073 819 991
 1 0 1 134 357 193
 1 1 0 53 149 71
 1 1 1 18 51 22
 

X.list<-make.X.list(k)

# prepare data

junk<-matrix(Lvars,7,6,byrow=T)
L1<-junk[,1]
L2<-junk[,2]
L3<-junk[,3]
y.ab<-junk[,4]
y.a<-junk[,5]
y.b<-junk[,6]
y3<-c(y.ab,y.a,y.b)


# now start search for good starting values
# the array result.dev contains the residual SS 
# for the regression to find an approximation to the true table

ppA<-seq(-2,2,length=20)
ppB<-seq(-2,2,length=20)
result.dev<-matrix(0,20,20)
for(i in (1:20)){
piA<-ppA[i]
for(j in (1:20)){
piB<-ppB[j]
stuff<-init.est(y3,X.list,k,piA,piB)
result.dev[i,j]<-stuff$dev*all(stuff$y.est>0)
}}

# minimum value correspomds to piA= -0.7368421, piB= -0.1052632, 
# the 7,10 element of result.dev is the minimum

piA<- ppA[7]
piB<- ppB[10]

# calculate the estimated true table
y.est<-init.est(y3,X.list,k,piA,piB)$y.est

# and the corresponding coefficients
init.glm<-glm(y.est~L1+L2+L3+L1:L2+L2:L3,family=poisson)
theta.start<-c(coef(init.glm),piA,piB)
modmat<-model.matrix(init.glm)

# now for the quasiliklihood fit
 

 quasifit<-solver.AB(y3, k, X.list, modmat, theta.start)
[1] 1.870334
[1] 0.2871848
[1] 0.0100164
[1] 0.000133724
[1] 2.928094e-06
[1] 6.927299e-08
[1] 1.508225e-09

> quasifit

$theta:
[1]  8.4180854 -2.6921179 -2.5880667  0.9638985  0.9463045 -0.1065020 -0.7986801
[8] -0.1613698

$cov:
             (Intercept)           L1           L2           L3         L1:L2 
(Intercept)  0.031360369 -0.008489373 -0.022019873 -0.030550415  3.545371e-03
         L1 -0.008489373  0.003347398  0.006144237  0.008206177 -2.682227e-03
         L2 -0.022019873  0.006144237  0.024891438  0.021644014 -5.661547e-03
         L3 -0.030550415  0.008206177  0.021644014  0.029850388 -3.393097e-03
      L1:L2  0.003545371 -0.002682227 -0.005661547 -0.003393097  1.310768e-02
      L2:L3  0.018293282 -0.005006838 -0.024774725 -0.018183815  4.200946e-03
            -0.008107267  0.002110324  0.003114350  0.007826105  5.976251e-05
            -0.005466024  0.001417884  0.002077625  0.005276232  7.851509e-05
                    L2:L3                             
(Intercept)  0.0182932818 -8.107267e-03 -5.466024e-03
         L1 -0.0050068384  2.110324e-03  1.417884e-03
         L2 -0.0247747252  3.114350e-03  2.077625e-03
         L3 -0.0181838149  7.826105e-03  5.276232e-03
      L1:L2  0.0042009460  5.976251e-05  7.851509e-05
      L2:L3  0.0275856454 -1.174082e-03 -7.669081e-04
            -0.0011740819  4.693547e-03  2.163727e-03
            -0.0007669081  2.163727e-03  2.948188e-03


#  Derived quantities

# error probabilites

1/(1+exp(-quasifit$theta[7:8]))
[1] 0.3103079 0.4597449

# estimate of N

exp(quasifit$theta[1])
[1] 4528.225

# conf interval
exp(quasifit$theta[1] +c(-1,1)*quasifit$cov[1,1]*1.96)
[1] 4258.273 4815.291

bad.glm<-glm(y.ab~L1+L2+L3+L1:L2+L2:L3,family=poisson)
> exp(bad.glm$coef[1])
 (Intercept) 
      108413