est.dispersion <- function(obs, est, distname)
{
# est.dispersion
# Estimates the dispersion for a set of past data where actual and 
# estimated efforts are known.  
# "obs" is a vector of observed efforts taken from past data.
# "est" is the corresponding vector of estimated efforts:
# these represent the MEDIANS of the distributions for distname=lnorm.med,
# and the MEANS of the distributions for all other distnames.
# 
# This function is most appropriate when the estimated efforts were 
# obtained by guesswork before the projects started.  If they were
# derived as the output of a regression model, then dispersion would
# normally be estimated as part of the model, rather than estimating
# it separately using this function. 
#
# "distname" is one of "norm", "gamma", "lnorm", or "lnorm.med"; it must be supplied 
# as a character string.
# For distname="norm", dispersion = variance = sigma^2.
# For distname="gamma" and "lnorm", dispersion = variance/(mean^2).
# For distname="lnorm.med", dispersion = variance/(median^2).
# 
#
# EXAMPLE: 
# a set of past projects might be stored in data frame "pastdata"
# with columns "observed" and "estimated": then to estimate the
# dispersion for the Gamma distribution:
# est.dispersion(obs=pastdata$observed, est=pastdata$estimated, "gamma")
#
        ddist.func <- function(x, distname, meanvec, dispersion)
        {
                switch(distname,
                        norm = dnorm(x, mean = meanvec, sd = sqrt(dispersion)),
                        gamma = dgamma(x, shape = 1/dispersion, scale =
                                dispersion * meanvec),
                        lnorm = dlnorm(x, meanlog = log(meanvec/sqrt(1 +
                                dispersion)), sdlog = sqrt(log(1 + dispersion))),
			lnorm.med = dlnorm(x, meanlog = log(meanvec),
                                sdlog = sqrt(log((1 + sqrt(1 + 4 * dispersion))/2))),
			stop("invalid probability distribution specified. \nUse distname= \"norm\", \"gamma\", \"lnorm\", or \"lnorm.med\".")) 
        }
	meanvec <- est
#
# (called meanvec although it will be medians for lnorm.med distribution).
# nllike.func is the negative log-likelihood when mean(or median)=est, 
# observations=obs, and dispersion is known:
	nllike.func <- function(disp){		
		# add a line to bypass dgamma if distname=gamma,
		# because R on Windows will crash if the dispersion
		# hits a negative value during the optimization
		if((distname == "gamma")&(disp <= 0)) nllike <- NaN
		else nllike <- sum(-log(ddist.func(obs, distname, meanvec, disp)))
		nllike
	}
#
# choose a starting value for dispersion for the minimization:
# for the Normal distribution this is just the variance;
# for the Gamma and Lognormal distributions it is based on the idea that
# the maximum difference between estimate and observed might be about 2 sd's,
# so dispersion estimate would be sd^2/mean^2 and this might therefore be 
# about 1/4 * max((estimate-observed)^2/estimate^2);
# this value is also used as a crude guess for the Lognormal-median distribution.
	start.disp <- switch(distname,
			norm = mean((obs-meanvec)^2),
			gamma = max((1-obs/meanvec)^2)/4,
			lnorm = max((1-obs/meanvec)^2)/4,
			lnorm.med = max((1-obs/meanvec)^2)/4,
			stop("invalid probability distribution specified. \nUse distname= \"norm\", \"gamma\", \"lnorm\", or \"lnorm.med\".")) 
# switch off warning messages just for the minimization:
	options(warn=-1)
	result <- nlm(nllike.func, start.disp, iterlim=1000)
	cat("\n\nStart value for minimization: ", start.disp, "\n\n")
	cat("Minimization result:\n")
	print(result)
# switch warning messages back on:
	options(warn=0)
	disp.est <- c(disp.est=result$estimate)
	cat("Estimated dispersion: ", disp.est,"\n")
	disp.est
			
}