projman <- function(projdat, E, Ntimes = 10000, distname, dispersion=NULL,
newfinish)
{
# projman: project management function.
# Project details are contained in the dataframe projdat, with fields
# 	name     (name)
# 	needs    (estimated (mean) effort required, in person-days)
#	finish   (finish date: number of days from now, proposed or actual)
#	sofar    (effort expended so far, in person-days)
#	commit   (flag y/n indicating whether the finish date is a firm
# 			commitment or not)
# 
# Given E employees, simulates Ntimes sets of variables from this set and 
# checks to see if everything was finished on time (cumsum(Y)<=E*finish).
# Returns the empirical probability that everything was finished on time, 
# together with a breakdown of the "problem dates".   
# 
# Y_i is the effort taken for project i (in person-days), and it is assumed to 
# be distributed according to distribution distname, which is one of
# "norm", "pois", "chisq", "gamma", "lnorm", "lnorm.med"; the mean/median of the 
# distribution is needs[i] and the dispersion is given by argument "dispersion", 
# which is either of length 1 (same dispersion for all projects)
# or of length nproj (each project with different dispersion, although this
# wouldn't be too common).  
# In the case of lnorm.med, needs[i] is the MEDIAN of the distribution. In all
# other cases, needs[i] is the MEAN.  
# For distname="pois" or "chisq", dispersion should be NULL. 
# For distname="norm", dispersion = variance = sigma^2.
# For distname="gamma", dispersion = variance/(mean^2).
# For distname="lnorm", dispersion = variance/(mean^2).
# For distname="lnorm.med", deal with the MEDIANS of the distribution;
#               dispersion = variance/(median^2) = exp(sdlog^2)*(exp(sdlog^2)-1).
#
# If argument newfinish is present, it overrides the finish field of 
# data frame projdat. 
#
# EXAMPLE: 
# to find P(success) for the deadlines suggested in file "portfolio",
# where portfolio is initially stored in the working directory,
# and effort is assumed to be Gamma with common dispersion 0.4:
# portfolio <- read.table("portfolio", header=T)
# projman(portfolio, E=4, Ntimes=10000, distname="gamma", dispersion=0.4)    
#
# first some housekeeping:
	nproj <- length(projdat$needs)
	if(!is.numeric(dispersion))
		switch(distname,
			gamma = stop(
		        "dispersion argument needed for distname=gamma."
				),
			norm = stop(
			"dispersion argument needed for distname=norm."
				),
			lnorm = stop(
			"dispersion argument needed for distname=lnorm."
				),
			lnorm.med = stop(
			"dispersion argument needed for distname=lnorm.med."
				),
			NULL)
	if((!is.null(dispersion)) & (length(dispersion) != 1) & (length(dispersion) != nproj))
		stop("dispersion vector should be length 1 or nproj\n")
	if(missing(distname)) stop("distname argument required. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\".")	
#	 
# Define functions rdist.func, qdist.func, and pdist.func, which generate
# random samples, quantiles, and probabilities from the distribution 
# distname given mean vector meanvec and dispersion vector dispvec:
	rdist.func <- function(ss, distname, meanvec, dispvec)
	{
		switch(distname,
			norm = rnorm(ss, mean = meanvec, sd = sqrt(dispvec)),
			pois = rpois(ss, lambda = meanvec),
			chisq = rchisq(ss, df = meanvec),
			gamma = rgamma(ss, shape = 1/dispvec, scale = 
				dispvec * meanvec),
			lnorm = rlnorm(ss, meanlog = log(meanvec/sqrt(1 + 
				dispvec)), sdlog = sqrt(log(1 + dispvec))),
 			lnorm.med = rlnorm(ss, meanlog = log(meanvec),
 				sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))),
			stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\"."))
	}
	qdist.func <- function(p, distname, meanvec, dispvec)
	{
		switch(distname,
			norm = qnorm(p, mean = meanvec, sd = sqrt(dispvec)),
			pois = qpois(p, lambda = meanvec),
			chisq = qchisq(p, df = meanvec),
			gamma = qgamma(p, shape = 1/dispvec, scale = 
				dispvec * meanvec),
			lnorm = qlnorm(p, meanlog = log(meanvec/sqrt(1 + 
				dispvec)), sdlog = sqrt(log(1 + dispvec))),
 			lnorm.med = qlnorm(p, meanlog = log(meanvec),
 				sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))),
			stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\"."))
	}
	pdist.func <- function(q, distname, meanvec, dispvec)
	{
		switch(distname,
			norm = pnorm(q, mean = meanvec, sd = sqrt(dispvec)),
			pois = ppois(q, lambda = meanvec),
			chisq = pchisq(q, df = meanvec),
			gamma = pgamma(q, shape = 1/dispvec, scale = 
				dispvec * meanvec),
			lnorm = plnorm(q, meanlog = log(meanvec/sqrt(1 + 
				dispvec)), sdlog = sqrt(log(1 + dispvec))),
 			lnorm.med = plnorm(q, meanlog = log(meanvec),
 				sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))),
			stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\"."))
	}
# Order the projects according to their finish date:
	if(missing(newfinish))
		newfinish <- projdat$finish
	ord <- order(newfinish)
	name <- projdat$name[ord]
	needs <- projdat$needs[ord]
	sofar <- projdat$sofar[ord]
	commit <- projdat$commit[ord]
	finish <- sort(newfinish)
	if(length(dispersion) > 1)
		dispersion <- dispersion[ord]
	cat("\nProjects in order from earliest deadline to latest: \n")
	print(as.character(name), quote=F)
	cat("\n\nStarting Monte Carlo simulations... please wait:\n")	#
# Ensure that the dispersion vector has length nproj:
	if(length(dispersion) == 1)
		dispersion <- rep(dispersion, nproj)
	ymat <- matrix(ncol = Ntimes, nrow = nproj)	#
# First generate times from unconditional probability distributions for the 
# projects not yet started:
	nunstarted <- length(sofar[sofar == 0])
	if(nunstarted > 0) ymat[sofar == 0,  ] <- rdist.func(Ntimes * 
			nunstarted, distname, meanvec = needs[sofar == 0], 
			dispvec = dispersion[sofar == 0])	#
# Now generate times from the conditional probability distribution for the
# projects already started (those with sofar>0):
# need to subtract off the amount done so far (sofar) before inserting into
# ymat.
	nstarted <- length(sofar[sofar > 0])
	if(nstarted > 0) {
		unif <- runif(Ntimes * nstarted)
		pdist.t <- pdist.func(sofar[sofar > 0], distname, meanvec = 
			needs[sofar > 0], dispvec = dispersion[sofar > 0])
		ymat[sofar > 0,  ] <- qdist.func((1 - pdist.t) * unif + pdist.t,
			distname, meanvec = needs[sofar > 0], dispvec = 
			dispersion[sofar > 0]) - sofar[sofar > 0]
	}
#
	cumsum.ymat <- apply(ymat, 2, cumsum)	
	results.mat <- (apply(cumsum.ymat, 2, function(x)
	{
		as.numeric(x <= E * finish)
	}
	))	# 
# first.fail is a vector storing, for each of the Ntimes trials, the
# first project to fail (get a 0 in results.mat).  If this is NA then
# all projects succeeded.   overall.nfail and overall.nsuccess are the
# proportions of the Ntimes trials to fail and succeed respectively
# (success means ALL nproj projects succeeded). 
	first.fail <- apply(results.mat, 2, function(x)
	{
		min(c(nproj+10, (1:nproj)[x == 0]))
	}
	)	
	first.fail[first.fail>nproj] <- NA
# note the treatment of min to deal with differences between Splus and R.
#
	overall.failures <- na.omit(first.fail)
	overall.nfail <- length(overall.failures)/Ntimes
	overall.nsuccess <- 1 - overall.nfail	#
# next line calculates for each project the percentage of times it was the
# FIRST project to fail: pc.firstfail
	pc.firstfail <- tabulate(overall.failures, nbins = nproj)/Ntimes * 100	
	#
# then pc.fail is for each project the percentage of times it failed:
	pc.fail <- apply(results.mat, 1, function(x)
	{
		length(x[x == 0])
	}
	)/Ntimes * 100	#
# Finally use the cumsum.ymat results to find the suggested finish dates
# for each project: choose the 95% point so that 5% of projects fail, 
# and divide this by E for the number of days necessary.
	point.95 <-  round(Ntimes * 0.95)
	point.99 <-  round(Ntimes * 0.99)
	finish.95.99 <- apply(cumsum.ymat, 1, function(x)
	{
		sort(x)[c(point.95, point.99)]
	}
	)/E
	print(data.frame(name = name, pc.fail = pc.fail, pc.firstfail = 
		pc.firstfail, finish = finish, finish.95 = round(finish.95.99[1,
		]), finish.99 = round(finish.95.99[2,  ]), row.names = NULL))
	cat("\n\n")
	c(pc.success = overall.nsuccess * 100)
}