projman <- function(projdat, E, Ntimes = 10000, distname, dispersion=NULL, newfinish) { # projman: project management function. # Project details are contained in the dataframe projdat, with fields # name (name) # needs (estimated (mean) effort required, in person-days) # finish (finish date: number of days from now, proposed or actual) # sofar (effort expended so far, in person-days) # commit (flag y/n indicating whether the finish date is a firm # commitment or not) # # Given E employees, simulates Ntimes sets of variables from this set and # checks to see if everything was finished on time (cumsum(Y)<=E*finish). # Returns the empirical probability that everything was finished on time, # together with a breakdown of the "problem dates". # # Y_i is the effort taken for project i (in person-days), and it is assumed to # be distributed according to distribution distname, which is one of # "norm", "pois", "chisq", "gamma", "lnorm", "lnorm.med"; the mean/median of the # distribution is needs[i] and the dispersion is given by argument "dispersion", # which is either of length 1 (same dispersion for all projects) # or of length nproj (each project with different dispersion, although this # wouldn't be too common). # In the case of lnorm.med, needs[i] is the MEDIAN of the distribution. In all # other cases, needs[i] is the MEAN. # For distname="pois" or "chisq", dispersion should be NULL. # For distname="norm", dispersion = variance = sigma^2. # For distname="gamma", dispersion = variance/(mean^2). # For distname="lnorm", dispersion = variance/(mean^2). # For distname="lnorm.med", deal with the MEDIANS of the distribution; # dispersion = variance/(median^2) = exp(sdlog^2)*(exp(sdlog^2)-1). # # If argument newfinish is present, it overrides the finish field of # data frame projdat. # # EXAMPLE: # to find P(success) for the deadlines suggested in file "portfolio", # where portfolio is initially stored in the working directory, # and effort is assumed to be Gamma with common dispersion 0.4: # portfolio <- read.table("portfolio", header=T) # projman(portfolio, E=4, Ntimes=10000, distname="gamma", dispersion=0.4) # # first some housekeeping: nproj <- length(projdat\$needs) if(!is.numeric(dispersion)) switch(distname, gamma = stop( "dispersion argument needed for distname=gamma." ), norm = stop( "dispersion argument needed for distname=norm." ), lnorm = stop( "dispersion argument needed for distname=lnorm." ), lnorm.med = stop( "dispersion argument needed for distname=lnorm.med." ), NULL) if((!is.null(dispersion)) & (length(dispersion) != 1) & (length(dispersion) != nproj)) stop("dispersion vector should be length 1 or nproj\n") if(missing(distname)) stop("distname argument required. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\".") # # Define functions rdist.func, qdist.func, and pdist.func, which generate # random samples, quantiles, and probabilities from the distribution # distname given mean vector meanvec and dispersion vector dispvec: rdist.func <- function(ss, distname, meanvec, dispvec) { switch(distname, norm = rnorm(ss, mean = meanvec, sd = sqrt(dispvec)), pois = rpois(ss, lambda = meanvec), chisq = rchisq(ss, df = meanvec), gamma = rgamma(ss, shape = 1/dispvec, scale = dispvec * meanvec), lnorm = rlnorm(ss, meanlog = log(meanvec/sqrt(1 + dispvec)), sdlog = sqrt(log(1 + dispvec))), lnorm.med = rlnorm(ss, meanlog = log(meanvec), sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))), stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\".")) } qdist.func <- function(p, distname, meanvec, dispvec) { switch(distname, norm = qnorm(p, mean = meanvec, sd = sqrt(dispvec)), pois = qpois(p, lambda = meanvec), chisq = qchisq(p, df = meanvec), gamma = qgamma(p, shape = 1/dispvec, scale = dispvec * meanvec), lnorm = qlnorm(p, meanlog = log(meanvec/sqrt(1 + dispvec)), sdlog = sqrt(log(1 + dispvec))), lnorm.med = qlnorm(p, meanlog = log(meanvec), sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))), stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\".")) } pdist.func <- function(q, distname, meanvec, dispvec) { switch(distname, norm = pnorm(q, mean = meanvec, sd = sqrt(dispvec)), pois = ppois(q, lambda = meanvec), chisq = pchisq(q, df = meanvec), gamma = pgamma(q, shape = 1/dispvec, scale = dispvec * meanvec), lnorm = plnorm(q, meanlog = log(meanvec/sqrt(1 + dispvec)), sdlog = sqrt(log(1 + dispvec))), lnorm.med = plnorm(q, meanlog = log(meanvec), sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))), stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\".")) } # Order the projects according to their finish date: if(missing(newfinish)) newfinish <- projdat\$finish ord <- order(newfinish) name <- projdat\$name[ord] needs <- projdat\$needs[ord] sofar <- projdat\$sofar[ord] commit <- projdat\$commit[ord] finish <- sort(newfinish) if(length(dispersion) > 1) dispersion <- dispersion[ord] cat("\nProjects in order from earliest deadline to latest: \n") print(as.character(name), quote=F) cat("\n\nStarting Monte Carlo simulations... please wait:\n") # # Ensure that the dispersion vector has length nproj: if(length(dispersion) == 1) dispersion <- rep(dispersion, nproj) ymat <- matrix(ncol = Ntimes, nrow = nproj) # # First generate times from unconditional probability distributions for the # projects not yet started: nunstarted <- length(sofar[sofar == 0]) if(nunstarted > 0) ymat[sofar == 0, ] <- rdist.func(Ntimes * nunstarted, distname, meanvec = needs[sofar == 0], dispvec = dispersion[sofar == 0]) # # Now generate times from the conditional probability distribution for the # projects already started (those with sofar>0): # need to subtract off the amount done so far (sofar) before inserting into # ymat. nstarted <- length(sofar[sofar > 0]) if(nstarted > 0) { unif <- runif(Ntimes * nstarted) pdist.t <- pdist.func(sofar[sofar > 0], distname, meanvec = needs[sofar > 0], dispvec = dispersion[sofar > 0]) ymat[sofar > 0, ] <- qdist.func((1 - pdist.t) * unif + pdist.t, distname, meanvec = needs[sofar > 0], dispvec = dispersion[sofar > 0]) - sofar[sofar > 0] } # cumsum.ymat <- apply(ymat, 2, cumsum) results.mat <- (apply(cumsum.ymat, 2, function(x) { as.numeric(x <= E * finish) } )) # # first.fail is a vector storing, for each of the Ntimes trials, the # first project to fail (get a 0 in results.mat). If this is NA then # all projects succeeded. overall.nfail and overall.nsuccess are the # proportions of the Ntimes trials to fail and succeed respectively # (success means ALL nproj projects succeeded). first.fail <- apply(results.mat, 2, function(x) { min(c(nproj+10, (1:nproj)[x == 0])) } ) first.fail[first.fail>nproj] <- NA # note the treatment of min to deal with differences between Splus and R. # overall.failures <- na.omit(first.fail) overall.nfail <- length(overall.failures)/Ntimes overall.nsuccess <- 1 - overall.nfail # # next line calculates for each project the percentage of times it was the # FIRST project to fail: pc.firstfail pc.firstfail <- tabulate(overall.failures, nbins = nproj)/Ntimes * 100 # # then pc.fail is for each project the percentage of times it failed: pc.fail <- apply(results.mat, 1, function(x) { length(x[x == 0]) } )/Ntimes * 100 # # Finally use the cumsum.ymat results to find the suggested finish dates # for each project: choose the 95% point so that 5% of projects fail, # and divide this by E for the number of days necessary. point.95 <- round(Ntimes * 0.95) point.99 <- round(Ntimes * 0.99) finish.95.99 <- apply(cumsum.ymat, 1, function(x) { sort(x)[c(point.95, point.99)] } )/E print(data.frame(name = name, pc.fail = pc.fail, pc.firstfail = pc.firstfail, finish = finish, finish.95 = round(finish.95.99[1, ]), finish.99 = round(finish.95.99[2, ]), row.names = NULL)) cat("\n\n") c(pc.success = overall.nsuccess * 100) }