setdates <- function(projdat, E = 4, Ntimes = 10000, distname, dispersion=NULL, psuccess = 0.9, priority = NULL) { # setdates # Sets finish dates for the non-committed projects in the project portfolio so # that the overall probability of portfolio success is psuccess (if possible). # First of all, the committed deadlines are examined to find the failure # probability on these deadlines only: e.g. might have 5 projects with # the 3rd one committed, and 10% failure to meet the third deadline alone. # If this still leaves some allowed failure to divide between the # non-committed projects, (i.e. if psuccess < 0.9 in the example), then # p.firstfail is divided equally among the non-committed projects # so that the overall success probability is psuccess. # Note, however, that p.firstfail is divided equally only using those # trials that *succeeded* on all the committed deadlines (e.g. in the example, # only on the 90% of trials that succeeded on deadline 3). This means that, # when the SETDATES deadlines are used in PROJMAN, it will NOT (generally) # indicate equal pc.firstfail among the uncommitted projects, and will also # (generally) give different p.firstfail for the committed projects from # SETDATES. This happens because some of the firstfail that SETDATES # attributes to the committed projects will later be swept up by the uncommitted # projects, once they have fixed deadlines as required by PROJMAN. # # For SETDATES to run, it must be given a project priority (order of finish time). # Different priorities give different results. # The default (if the "priority" argument is left NULL) is to order the # projects so that all committed projects are to be finished first (in order of # their fixed deadlines), then all non-committed projects are to be finished in # increasing order of expected effort, e_i. # Alternatively, the user can specify a priority using the argument "priority". # This is a vector: for example, priority=c(3, 1, 2, 4), which indicates that # project 3 should be finished first, project 1 should be finished second, and # so on. An obvious choice would be to use priority=order(projdat$finish), # so that new deadlines are determined for the projects in "projdat" but the # order in which the projects are to be finished is the same as originally # specified in the "projdat" dataframe. # # The priority argument must be a permutation of the project numbers # 1,2,3,..,nproj. # # Project details are contained in the dataframe projdat, with fields # name (name) # needs (estimated (mean) effort required, in person-days) # finish (finish date: number of days from now, proposed or actual) # sofar (effort expended so far, in person-days) # commit (flag y/n indicating whether the finish date is a firm # commitment or not) # # Given E employees, the function simulates Ntimes sets of effort times from the # distributions specified. # The finish-on-time criterion is (cumsum(Y) <= E*finish). # # Y_i is the effort taken for project i (in person-days), and it is assumed to be # distributed according to distribution distname, which is one of # "norm", "pois", "chisq", "gamma", "lnorm", and "lnorm.med"; the mean/median of the # distribution is needs[i] and the dispersion is given by argument "dispersion", # which is either of length 1 (same dispersion for all projects) # or of length nproj (each project with different dispersion, although this # wouldn't be too common). # For distribution lnorm.med, needs[i] represents the MEDIAN; for all other # distributions, needs[i] represents the MEAN. # # For distname="pois" or "chisq", dispersion should be NULL. # For distname="norm", dispersion = variance = sigma^2. # For distname="gamma", dispersion = variance/(mean^2). # For distname="lnorm", dispersion = variance/(mean^2). # For distname="lnorm.med", dispersion = variance/(median^2) # = exp(sdlog^2)*(exp(sdlog^2)-1). # # # EXAMPLE: # find a set of finish dates for "portfolio" data set, with P(success)=0.90, # then test it in projman to check it's about right. # e.g. using chi-squared distribution: # dates <- setdates(portfolio,E=4,Ntimes=10000,distname="chisq",psuccess=0.90) # projman(portfolio,E=4,Ntimes=10000,distname="chisq",newfinish=dates) # # or using gamma distribution with dispersion 0.4: # dates <- setdates(portfolio, E=4, Ntimes=10000, # distname="gamma", dispersion=0.4, psuccess=0.90) # projman(portfolio, E=4, Ntimes=10000, distname="gamma", dispersion=0.4, # newfinish=dates) # # # first some housekeeping: nproj <- length(projdat$needs) if(!is.numeric(dispersion)) switch(distname, gamma = stop( "\ndispersion argument needed for\ndistname=gamma." ), norm = stop( "\ndispersion argument needed for distname=norm." ), lnorm = stop( "\ndispersion argument needed for distname=lnorm." ), lnorm.med = stop( "\ndispersion argument needed for distname=lnorm.med." ), NULL) if((!is.null(dispersion)) & (length(dispersion) != 1.) & (length( dispersion) != nproj)) stop("\ndispersion vector should be length 1 or nproj\n") if((!is.null(priority)) & (length(priority) != nproj)) stop("\npriority vector must be a permutation of 1, 2, ..., nproj\n" ) if((!is.null(priority))) if(sum((sort(priority) - (1:nproj))^2) > 0) stop("\npriority vector must be a permutation of 1, 2, ..., nproj\n" ) # # Define functions rdist.func, qdist.func, and pdist.func, which generate # random samples, quantiles, and probabilities from the distribution # distname given mean vector meanvec and dispersion vector dispvec: if(missing(distname)) stop( "distname argument required. \nUse distname=\"pois\",\n\"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\"." ) rdist.func <- function(ss, distname, meanvec, dispvec) { switch(distname, norm = rnorm(ss, mean = meanvec, sd = sqrt(dispvec)), pois = rpois(ss, lambda = meanvec), chisq = rchisq(ss, df = meanvec), gamma = rgamma(ss, shape = 1/dispvec, scale=dispvec * meanvec), lnorm = rlnorm(ss, meanlog = log(meanvec/sqrt(1 + dispvec)), sdlog = sqrt(log(1 + dispvec))), lnorm.med = rlnorm(ss, meanlog = log(meanvec), sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))), stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\".")) } qdist.func <- function(p, distname, meanvec, dispvec) { switch(distname, norm = qnorm(p, mean = meanvec, sd = sqrt(dispvec)), pois = qpois(p, lambda = meanvec), chisq = qchisq(p, df = meanvec), gamma = qgamma(p, shape = 1/dispvec, scale = dispvec * meanvec), lnorm = qlnorm(p, meanlog = log(meanvec/sqrt(1 + dispvec)), sdlog = sqrt(log(1 + dispvec))), lnorm.med = qlnorm(p, meanlog = log(meanvec), sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))), stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\".")) } pdist.func <- function(q, distname, meanvec, dispvec) { switch(distname, norm = pnorm(q, mean = meanvec, sd = sqrt(dispvec)), pois = ppois(q, lambda = meanvec), chisq = pchisq(q, df = meanvec), gamma = pgamma(q, shape = 1/dispvec, scale = dispvec * meanvec), lnorm = plnorm(q, meanlog = log(meanvec/sqrt(1 + dispvec)), sdlog = sqrt(log(1 + dispvec))), lnorm.med = plnorm(q, meanlog = log(meanvec), sdlog = sqrt(log((1 + sqrt(1 + 4 * dispvec))/2))), stop("invalid probability distribution specified. \nUse distname=\"pois\", \"norm\", \"chisq\", \"gamma\", \"lnorm\", or \"lnorm.med\".")) } # # housekeeping with committed and uncommitted projects. # note that "committed.orig" is the indices of the ORIGINAL # data frame that are committed, and doesn't apply after # the projects have been reordered. # The program exits if the committed deadlines are not # consistent with those specified in priority: i.e. # needs # order(projdat$finish[committed.orig])=order(ranks[committed.orig]) # where ranks[i]=3 if project i has the 3rd shortest deadline, etc; # That is, the project with the shortest committed deadline appears # as the first rank among the committed projects, and so on. committed.orig <- (1.:nproj)[projdat$commit == "y"] ncommitted <- length(committed.orig) if(ncommitted > 0) mutable.orig <- (1:nproj)[ - committed.orig] else mutable.orig <- (1:nproj) nmutable <- length(mutable.orig) if(!is.null(priority)){ ranks <- order(priority) if(sum((order(projdat$finish[committed.orig]) - order(ranks[ committed.orig]))^2) > 0) stop("\nspecified priority vector is inconsistent with known commitments.\n" ) } # # Now put the projects into deadline order. # If the "priority" vector is not specified, the default ordering # is to order the committed projects according to their finish date # and the uncommitted (mutable) projects according to their expected # effort. # Otherwise, the vector ord is ord=priority. if(is.null(priority)) { com.order <- committed.orig[order(projdat$finish[committed.orig ])] mutable.order <- mutable.orig[order(projdat$needs[mutable.orig] )] ord <- c(com.order, mutable.order) } else ord <- priority # # Apply the new ordering to all data fields: # the new vector "committed" refers to the ordered projects # (unlike committed.orig). name <- projdat$name[ord] needs <- projdat$needs[ord] sofar <- projdat$sofar[ord] commit <- projdat$commit[ord] finish <- projdat$finish[ord] committed <- (1:nproj)[commit == "y"] mutable <- (1:nproj)[commit == "n"] # if(length(dispersion) > 1.) dispersion <- dispersion[ord] cat("\nOrdered projects: \n") # Ensure that the dispersion vector has length nproj: # print(as.character(name), quote=F) if(length(dispersion) == 1.) dispersion <- rep(dispersion, nproj) # First generate times from unconditional probability distributions for the # projects not yet started: ymat <- matrix(ncol = Ntimes, nrow = nproj) nunstarted <- length(sofar[sofar == 0]) # Now generate times from the conditional probability distribution for the # projects already started (those with sofar>0): # need to subtract off the amount done so far (sofar) before inserting into # ymat. if(nunstarted > 0) ymat[sofar == 0, ] <- rdist.func(Ntimes * nunstarted, distname, meanvec = needs[sofar == 0], dispvec = dispersion[sofar == 0]) nstarted <- length(sofar[sofar > 0]) # if(nstarted > 0) { unif <- runif(Ntimes * nstarted) pdist.t <- pdist.func(sofar[sofar > 0], distname, meanvec = needs[sofar > 0], dispvec = dispersion[sofar > 0]) ymat[sofar > 0, ] <- qdist.func((1 - pdist.t) * unif + pdist.t, distname, meanvec = needs[sofar > 0], dispvec = dispersion[sofar > 0]) - sofar[sofar > 0] } # Now find p.firstfail for the committed projects. # first.fail is a vector storing, for each of the Ntimes trials, the # first of the committed projects to fail. If this is NA then all # committed projects succeeded. cumsum.ymat <- apply(ymat, 2., cumsum) if(ncommitted == 1) { # want first.fail to be NA if the committed project # succeeded, or 1 otherwise first.fail <- rep(NA, Ntimes) first.fail[cumsum.ymat[committed, ] > E * finish[committed]] <- 1 # "1" means that first (and only) committed project failed p.firstfail <- length(na.omit(first.fail))/Ntimes cat("p.firstfail: ", p.firstfail, "\n") } else if(ncommitted > 1.) { first.fail <- apply(cumsum.ymat[committed, ], 2., function(x) { # (e.g. returns 2 if the committed project # with 2nd shortest deadline was the first to fail) # min(c(nproj+10, (1:ncommitted)[x > E * finish[committed]])) } ) first.fail[first.fail > nproj] <- NA # note the treatment of min to deal with differences between Splus # and R. # overall.failures <- na.omit(first.fail) p.firstfail <- tabulate(overall.failures, nbins = ncommitted)/ Ntimes cat("p.firstfail: ", p.firstfail, "\n") } else { first.fail <- rep(NA, Ntimes) p.firstfail <- 0 } fail.ok <- (1 - psuccess - sum(p.firstfail))/nmutable if(fail.ok < 0) cat("\n\nSorry: specified failure rate is already exceeded by commitments. \nBest success rate is ", format(100 * (1 - sum(p.firstfail))), "%\n\n") # # Now use cumsum.ymat to find the suggested finish dates for each mutable # project: fail.ok is the allowed probability of first-fail for each one. # Do the projects one at a time, store the results in finish.res. # Note that the fail.ok quantile must be divided by E for the number of # days necessary. # The vector rep.in contains the indices of cumsum.ymat that are "in" # at each stage: only those replicates who have "passed" all of projects # 1 to i-1 go through to project i. # fail.ok <- max(c(fail.ok, 0)) finish.res <- numeric(nproj) if(ncommitted > 0) finish.res[committed] <- finish[committed] # point.ok is the number of the longest-taking projects that will be discarded # to find the finish time at which fail.ok projects fail. point.ok <- round(Ntimes * fail.ok) if(nmutable > 0.) { # # the uncommitted projects are indexed by "mutable": # so the i'th project is mutable[i]. cat("\n\n fail.ok (%): ", format(100. * fail.ok), "\n\n") rep.in <- (1:Ntimes)[is.na(first.fail)] # rep.in are those of the Ntimes simulations that # succeeded on all the committed projects. for(i in 1:nmutable) { proj.i <- mutable[i] n.in <- length(rep.in) # (this changes as i increments) cumsum.ind.i <- rep.in[order(cumsum.ymat[proj.i, rep.in ])] finish.res[proj.i] <- cumsum.ymat[proj.i, cumsum.ind.i[ n.in - point.ok]]/E rep.in <- cumsum.ind.i[1:(n.in - point.ok)] } } # # get finish.res back into order of original dataset to return: names(finish.res) <- levels(name)[name] finish.res[order(ord)] }