# smartpred code for Version 6.0 Release 1 for Sun SPARC, SunOS 5.6 : 2000 # Changes and new functions supplied here are # (C) 2000 T. W. Yee and T. J. Hastie # This file may be used only with a licenced copy of # Version 6.x of SPLUS for some version of UNIX # See DISCLAIMER for disclaimer and copyright notice. # Version 0.72 # 16 January 2002 # (C) T. W. Yee and T. J. Hastie # Functions/expressions in this file are: # lm # predict.lm # predict.mlm # glm # survreg # survReg # gls # predict.gls # New functions ========================================================= # smart.mode.is # setup.smart # wrapup.smart (not needed) # get.smart.prediction # put.smart # get.smart # is.smart # # Smart functions below here ========================================== # bs # ns # predict.bs # predict.ns # poly.raw # scale # # Home baked smart functions below here =============================== # my1 (example) # my2 (example) # stdze1 (example) # stdze2 (example) lm <- function(formula, data, weights, subset, na.action, method = "qr", model = F, x = F, y = F, contrasts = NULL, smart=T, ...) { call <- match.call() m <- match.call(expand = F) if(smart) setup.smart("write") # This is new m$method <- m$model <- m$x <- m$y <- m$contrasts <- m$smart <- m$... <- NULL m$drop.unused.levels <- T m[[1]] <- as.name("model.frame") m <- eval(m, sys.parent()) if(method == "model.frame") return(m) Terms <- attr(m, "terms") xvars <- as.character(attr(Terms, "variables")) if((yvar <- attr(Terms, "response")) > 0) xvars <- xvars[ - yvar] if(length(xvars) > 0) { xlevels <- lapply(m[xvars], levels) xlevels <- xlevels[!sapply(xlevels, is.null)] if(length(xlevels) == 0) xlevels <- NULL } else xlevels <- NULL weights <- model.extract(m, weights) Y <- model.extract(m, response) X <- model.matrix(Terms, m, contrasts) na.message <- attr(m, "na.message") saved.na.action <- attr(m, "na.action") if(!model) unset(m) fit <- if(length(weights)) lm.wfit(if(x) X else unset(X), Y, weights, method, ...) else lm.fit(if(x) X else unset(X), Y, method, ...) fit$terms <- Terms fit$call <- call if(model) fit$model <- m if(x) fit$x <- X if(y) fit$y <- Y if(!is.null(xlevels)) attr(fit, "xlevels") <- xlevels attr(fit, "na.message") <- na.message fit$na.action <- saved.na.action if(smart) { fit$smart.prediction <- get.smart.prediction() # This is new } fit } attr(lm, "smart") <- TRUE predict.lm <- function(object, newdata, type = c("response", "terms"), se.fit = F, terms = labels(object), ci.fit = F, pi.fit = F, conf.level = 0.95, conf.type = "p", ...) { type <- match.arg(type) fit.only <- !(se.fit || ci.fit || pi.fit) if(missing(newdata) && type != "terms" && fit.only) return(fitted(object)) Terms <- object$terms if(!inherits(Terms, "terms")) stop("invalid terms component of object") offset <- attr(Terms, "offset") intercept <- attr(Terms, "intercept") xbar <- NULL if(missing(newdata)) { x <- model.matrix(object) #center x if terms are to be computed if(type == "terms" && intercept) { xbar <- apply(x, 2., mean) x <- sweep(x, 2., xbar) } } else if(!((is.atomic(newdata) && length(newdata) == 1. && length(object$coef) != 1. && newdata > 0. && (newdata - trunc(newdata) < .Machine$single.eps)) | is.list(newdata))) { if(!is.null(offset)) { warning("Offset not included") #try and coerce newdata to look like the x matrix offset <- NULL } TT <- length(object$coef) if(is.matrix(newdata) && ncol(newdata) == TT) x <- newdata else if(length(newdata) == TT) x <- matrix(newdata, 1., TT) else stop("Argument \"newdata\" is not a data frame, and cannot be coerced to an appropriate model matrix" ) } else { #newdata is a list, data frame or frame number if(length(object$smart.prediction)) { # Smart prediction: handle the prediction flaw setup.smart("read", smart.prediction=object$smart.prediction) } x <- model.matrix(delete.response(Terms), newdata, contrasts = object$contrasts, xlevels = attr(object, "xlevels")) if(length(object$smart.prediction)) { wrapup.smart() # not needed actually } if(!is.null(offset)) offset <- eval(attr(Terms, "variables")[offset], newdata) } if(!missing(newdata) && type == "terms" && intercept) { xold <- model.matrix(object) #need to center x xbar <- apply(xold, 2., mean) x <- sweep(x, 2., xbar) } coefs <- coef(object) asgn <- attr(coefs, "assign") if(type == "terms") { if(ci.fit || pi.fit) { warning("Confidence and prediction intervals not available for type='terms'" ) ci.fit <- F pi.fit <- F } terms <- match.arg(terms, labels(object)) asgn <- asgn[terms] } nac <- is.na(object$coef) if(any(nac)) { xbar <- xbar[!nac] x <- x[, !nac] } attr(x, "constant") <- xbar if(!fit.only) { fit.summary <- summary.lm(object) pred <- Build.terms(x, coefs, fit.summary$cov * fit.summary$ sigma^2., asgn, collapse = type != "terms") pred$residual.scale <- fit.summary$sigma pred$df <- object$df.resid } else pred <- Build.terms(x, coefs, NULL, assign = asgn, collapse = type != "terms") if(!is.null(offset) && type != "terms") { if(missing(newdata)) warning("Offset not included") else { if(!fit.only) pred$fit <- pred$fit + offset else pred <- pred + offset } } if(ci.fit || pi.fit) { if(conf.level > 1 && conf.level < 100) conf.level <- conf.level/100 crit.value <- switch(substring(conf.type, 1, 1), p = qt(1 - (1 - conf.level)/2, object$df.residual), s = sqrt(object$rank * qf(conf.level, object$ rank, object$df.residual)), stop("conf.type not recognized")) if(ci.fit) pred$ci.fit <- cbind(lower = pred$fit - crit.value * pred$se.fit, upper = pred$fit + crit.value * pred$se.fit) if(pi.fit) { pi.se <- sqrt(pred$residual.scale^2 + pred$se.fit^ 2) pred$pi.fit <- cbind(lower = pred$fit - pi.se * crit.value, upper = pred$fit + pi.se * crit.value) } } if(missing(newdata) && !is.null(object$na.action)) { if(fit.only) pred <- napredict(object$na.action, pred) else { pred$fit <- napredict(object$na.action, pred$fit) pred$se.fit <- napredict(object$na.action, pred$se.fit) if(ci.fit) pred$ci.fit <- napredict(object$na.action, pred$ci.fit) if(pi.fit) pred$pi.fit <- napredict(object$na.action, pred$pi.fit) } } # Add level attribute here as napredict() may strip it if(ci.fit) attr(pred$ci.fit, "conf.level") <- conf.level if(pi.fit) attr(pred$pi.fit, "conf.level") <- conf.level if((ci.fit || pi.fit) && (!se.fit)) { pred$se.fit <- NULL pred$residual.scale <- NULL pred$df <- NULL } pred } attr(predict.lm, "smart") <- TRUE predict.mlm <- function(fit, newdata, type, se.fit = F) { if(missing(newdata)) return(fit$fitted) if(se.fit | !missing(type)) stop("The \"type\" and \"se.fit\" arguments are not currently implemented for MLM objects; use \"predict.lm\" on a suitably subsetted model") if(length(object$smart.prediction)) { # Smart prediction: handle the prediction flaw setup.smart("read", smart.prediction=object$smart.prediction) } tt <- fit$terms if(!inherits(tt, "terms")) stop("invalid terms component of fit") vv <- attr(tt, "variables") attr(tt, "variables") <- vv[ - attr(tt, "response")] x <- model.matrix(tt, newdata, fit$contrasts, xlevels = attr(fit, "xlevels")) if(length(object$smart.prediction)) { wrapup.smart() } c <- coef(fit) asgn <- fit$assign pred <- x[, unlist(asgn)] %*% c if(inherits(fit, "mlm")) pred else pred[, 1.] } attr(predict.mlm, "smart") <- TRUE glm <- function(formula = formula(data), family = gaussian, data = sys.parent(), weights, subset, na.action, start = eta, control = glm.control(...), method = "glm.fit", model = F, x = F, y = T, contrasts = NULL, smart=T, ...) { call <- match.call() m <- match.call(expand = F) if(smart) setup.smart("write") # This is new m$family <- m$method <- m$model <- m$x <- m$y <- m$control <- m$ contrasts <- m$smart <- m$... <- NULL m$drop.unused.levels <- T m[[1]] <- as.name("model.frame") m <- eval(m, sys.parent()) Terms <- attr(m, "terms") if(method == "model.frame") return(m) xvars <- as.character(attr(Terms, "variables")) if(length(xvars) > 0) { xlevels <- lapply(m[xvars], levels) xlevels <- xlevels[!sapply(xlevels, is.null)] if(length(xlevels) == 0) xlevels <- NULL } else xlevels <- NULL a <- attributes(m) Y <- model.extract(m, response) X <- model.matrix(Terms, m, contrasts) w <- model.extract(m, weights) if(!length(w)) w <- rep(1, nrow(m)) else if(any(w < 0)) stop("negative weights not allowed") start <- model.extract(m, start) offset <- model.extract(m, offset) family <- as.family(family) if(missing(method)) method <- attr(family, "method") if(!is.null(method)) { if(!existsFunction(method)) stop(paste("unimplemented method:", method)) } else method <- "glm.fit" glm.fitter <- get(method) fit <- glm.fitter(x = X, y = Y, w = w, start = start, offset = offset, family = family, maxit = control$maxit, epsilon = control$ epsilon, trace = control$trace, null.dev = T, ...) # # If an offset and intercept is present, iterations are needed to # compute the Null deviance; these are done here, unless the model # is NULL, in which case the computations have been done already # if(any(offset) && attr(Terms, "intercept")) { null.deviance <- if(length(Terms)) glm.fitter(X[, "(Intercept)", drop = F], Y, w, offset = offset, family = family, maxit = control$maxit, epsilon = control$epsilon, null.dev = NULL)$deviance else fit$deviance fit$null.deviance <- null.deviance } oldClass(fit) <- c("glm", "lm") if(!is.null(xlevels)) attr(fit, "xlevels") <- xlevels fit$terms <- Terms fit$formula <- as.vector(attr(Terms, "formula")) fit$call <- call if(model) fit$model <- m if(x) fit$x <- X if(!y) fit$y <- NULL fit$control <- control if(!is.null(attr(m, "na.action"))) fit$na.action <- attr(m, "na.action") if(smart) { fit$smart.prediction <- get.smart.prediction() # This is new } fit } attr(glm, "smart") <- TRUE survreg <- function(formula = formula(data), data = sys.parent(), subset, na.action, link = "log", dist = c("extreme", "logistic", "gaussian", "exponential", "rayleigh"), init = NULL, fixed = list(), control, model = F, x = F, y = T, warn.deprecated = T, smart=T, ...) { if(warn.deprecated) warning("The function survreg() has been deprecated and replaced by the function survReg(). Please modify your function calls to use survReg().") call <- match.call() m <- match.call(expand = F) if(smart) setup.smart("write") # This is new m$dist <- m$link <- m$model <- m$x <- m$y <- m$init <- m$... <- NULL m$start <- m$fixed <- m$control <- m$warn.deprecated <- NULL m[[1]] <- as.name("model.frame") m <- eval(m, sys.parent()) Terms <- attr(m, "terms") dist <- match.arg(dist) lnames <- dimnames(glm.links)[[2]] link <- pmatch(link, lnames, 0) if(link == 0) stop("Invalid link function") else link <- lnames[link] Y <- model.extract(m, "response") if(!inherits(Y, "Surv")) stop("Response must be a survival object") X <- model.matrix(Terms, m) n <- nrow(X) nvar <- ncol(X) offset <- attr(Terms, "offset") if(!is.null(offset)) offset <- as.numeric(m[[offset]]) else offset <- rep(0., n) type <- attr(Y, "type") linkfun <- glm.links["link", link][[1]] if(type == "counting") stop("Invalid survival type") else if(type == "interval") { if(any(Y[, 3] == 3)) Y <- cbind(linkfun(Y[, 1:2]), Y[, 3]) else Y <- cbind(linkfun(Y[, 1]), Y[, 3]) } else if(type == "left") Y <- cbind(linkfun(Y[, 1]), 2 - Y[, 2]) else Y <- cbind(linkfun(Y[, 1]), Y[, 2]) controlvals <- survreg.control() if(!missing(control)) controlvals[names(control)] <- control if(dist == "exponential") { fixed$scale <- 1 dist <- "extreme" } else if(dist == "rayleigh") { fixed$scale <- 0.5 dist <- "extreme" } sd <- survreg.distributions[[dist]] if(length(fixed) > 0) { ifix <- match(names(fixed), names(sd$parms), nomatch = 0) if(any(ifix == 0)) stop(paste("Parameter(s)", paste(names(fixed)[ifix == 0]), "in the fixed list not valid for this dist")) } if(is.list(init) && length(init) > 0) { ifix <- match(names(init), c("eta", names(sd$parms)), nomatch = 0) if(any(ifix == 0)) stop(paste("Parameter(s)", paste(names(init)[ifix == 0]), "in the init list not valid for this dist" )) } if(is.null(init) && link == "identity") { lsfit.out <- lsfit(X, Y[, 1], intercept = F) init <- c(lsfit.out$coef, scale = log(sqrt(var(lsfit.out$ residuals)))) if(length(fixed) > 0) { init <- init[ - match(names(fixed), names(init))] } } sfit <- survreg.fit(X, Y, offset, init = init, controlvals = controlvals, dist = dist, fixed = fixed) #error message if(is.character(sfit)) fit <- list(fail = sfit) else { # There may be more clever ways to do this, but .... # In order to make it look like IRLS, and get all the components # that I need for glm inheritance, do one step of weighted least # squares. eta <- c(X %*% sfit$coef[1:nvar]) + offset wt <- - sfit$deriv[, 3] fit <- lm.wfit(X, eta + sfit$deriv[, 2]/wt, wt, "qr", ...) ifun <- glm.links["inverse", link][[1]] fit$fitted.values <- ifun(fit$fitted.values) fit$family <- c(name = dist, link = link, "") fit$linear.predictors <- eta fit$iter <- sfit$iter fit$parms <- sfit$parms fit$df.residual <- fit$df.residual - sum(!sfit$fixed) # If singular.ok=T, there may be NA coefs. The var matrix should # be an inversion of the "non NA" portion. var <- 0 * sfit$imat good <- c(!is.na(fit$coef), rep(T, ncol(var) - nvar)) var[good, good] <- solve(qr(sfit$imat[good, good], tol = 1e-12) ) fit$var <- var fit$fixed <- sfit$fixed dev <- sd$deviance(Y, fit$parms, sfit$deriv[, 1]) fit$dresiduals <- sign(fit$residuals) * sqrt(dev) fit$deviance <- sum(dev) fit$null.deviance <- fit$deviance + 2 * (sfit$loglik[2] - sfit$ ndev[2]) fit$loglik <- c(sfit$ndev[2], sfit$loglik[2]) } na.action <- attr(m, "na.action") if(length(na.action)) fit$na.action <- na.action oldClass(fit) <- c("survreg", "glm", "lm") fit$terms <- Terms fit$formula <- as.vector(attr(Terms, "formula")) fit$call <- call if(model) fit$model <- m if(x) fit$x <- X if(y) fit$y <- Y if(smart) { fit$smart.prediction <- get.smart.prediction() # This is new } fit } attr(survreg, "smart") <- TRUE survReg <- function(formula = formula(data), data = sys.parent(), weights, subset, na.action, dist = "weibull", init = NULL, scale = 0, control, parms = NULL, model = F, x = F, y = T, smart=T, ...) { call <- match.call() m <- match.call(expand = F) if(smart) setup.smart("write") # This is new temp <- c("", "formula", "data", "weights", "subset", "na.action") m <- m[match(temp, names(m), nomatch = 0)] m[[1]] <- as.name("model.frame") special <- c("strata", "cluster") Terms <- if(missing(data)) terms(formula, special) else terms(formula, special, data = data) m$formula <- Terms m <- eval(m, sys.parent()) Terms <- attr(m, "terms") weights <- model.extract(m, "weights") Y <- model.extract(m, "response") if(inherits(Y, "censor")) Y <- censor(Y[, - ncol(Y)], Y[, ncol(Y)], inCodes = "1-4", outCodes = "0-3") if(!inherits(Y, "Surv")) stop("Response must be a survival object") strats <- attr(Terms, "specials")$strata cluster <- attr(Terms, "specials")$cluster dropx <- NULL if(length(cluster)) { if(missing(robust)) robust <- T tempc <- untangle.specials(Terms, "cluster", 1:10) ord <- attr(Terms, "order")[tempc$terms] if(any(ord > 1)) stop("Cluster can not be used in an interaction") cluster <- strata(m[, tempc$vars], shortlabel = T) #allow multiples dropx <- tempc$terms } if(length(strats)) { temp <- untangle.specials(Terms, "strata", 1) dropx <- c(dropx, temp$terms) if(length(temp$vars) == 1) strata.keep <- m[[temp$vars]] else strata.keep <- strata(m[, temp$vars], shortlabel = T) strata <- as.numeric(strata.keep) nstrata <- max(strata) } else { nstrata <- 1 strata <- 0 } if(length(dropx)) X <- model.matrix(Terms[ - dropx], m) else X <- model.matrix(Terms, m) n <- nrow(X) nvar <- ncol(X) offset <- attr(Terms, "offset") if(!is.null(offset)) offset <- as.numeric(m[[offset]]) else offset <- rep(0, n) if(is.character(dist)) { dlist <- survReg.distributions[[dist]] if(is.null(dlist)) stop(paste(dist, ": distribution not found")) } else if(is.list(dist)) dlist <- dist else stop("Invalid distribution object") if(is.null(dlist$dist)) { if(is.character(dlist$name) && is.function(dlist$init) && is.function(dlist$deviance)) { } else stop("Invalid distribution object") } else { if(!is.character(dlist$name) || !is.character(dlist$dist) || !is.function(dlist$trans) || !is.function(dlist$dtrans) ) stop("Invalid distribution object") } type <- attr(Y, "type") if(type == "counting") stop("Invalid survival type") logcorrect <- 0 #correction to the loglik due to transformations if(!is.null(dlist$trans)) { tranfun <- dlist$trans exactsurv <- Y[, ncol(Y)] == 1 if(any(exactsurv)) logcorrect <- sum(log(dlist$dtrans(Y[exactsurv, 1]))) if(type == "interval") { if(any(Y[, 3] == 3)) Y <- cbind(tranfun(Y[, 1:2]), Y[, 3]) else Y <- cbind(tranfun(Y[, 1]), Y[, 3]) } else if(type == "left") Y <- cbind(tranfun(Y[, 1]), 2 - Y[, 2]) else Y <- cbind(tranfun(Y[, 1]), Y[, 2]) if(!all(is.finite(Y))) stop("Invalid survival times for this distribution") } else { if(type == "left") Y[, 2] <- 2 - Y[, 2] else if(type == "interval" && all(Y[, 3] < 3)) Y < Y[, c(1, 3)] } if(is.null(dlist$itrans)) itrans <- function(x) x else itrans <- dlist$itrans if(!is.null(dlist$scale)) { if(!missing(scale)) warning(paste(dlist$name, "has a fixed scale, user specified value ignored")) scale <- dlist$scale } if(!is.null(dlist$dist)) dlist <- survReg.distributions[[dlist$dist]] if(missing(control)) control <- survReg.control(...) else control <- do.call("survReg.control", c(control, ...)) if(scale < 0) stop("Invalid scale value") if(scale > 0 && nstrata > 1) stop("Cannot have multiple strata with a fixed scale") # Check for penalized terms pterms <- sapply(m, inherits, "coxph.penalty") if(any(pterms)) { pattr <- lapply(m[pterms], attributes) # # the 'order' attribute has the same components as 'term.labels' # pterms always has 1 more (response), sometimes 2 (offset) # drop the extra parts from pterms temp <- c(attr(Terms, "response"), attr(Terms, "offset")) if(length(dropx)) temp <- c(temp, dropx + 1) pterms <- pterms[ - temp] temp <- match((names(pterms))[pterms], attr(Terms, "term.labels")) ord <- attr(Terms, "order")[temp] if(any(ord > 1)) stop("Penalty terms cannot be in an interaction") pcols <- (attr(X, "assign")[-1])[pterms] fit <- survpenal.fit(X, Y, weights, offset, init = init, controlvals = control, dist = dlist, scale = scale, strata = strata, nstrat = nstrata, pcols, pattr, parms = parms) } else fit <- survReg.fit(X, Y, weights, offset, init = init, controlvals = control, dist = dlist, scale = scale, nstrat = nstrata, strata, parms = parms) #error message if(is.character(fit)) fit <- list(fail = fit) else { if(scale == 0) { nvar <- length(fit$coef) - nstrata fit$scale <- exp(fit$coef[ - (1:nvar)]) if(nstrata == 1) names(fit$scale) <- NULL else names(fit$scale) <- levels(strata.keep) fit$coefficients <- fit$coefficients[1:nvar] fit$idf <- 1 + nstrata } else { fit$scale <- scale fit$idf <- 1 } fit$loglik <- fit$loglik + logcorrect } na.action <- attr(m, "na.action") if(length(na.action)) fit$na.action <- na.action fit$df.residual <- n - sum(fit$df) fit$terms <- Terms fit$formula <- as.vector(attr(Terms, "formula")) fit$means <- apply(X, 2, mean) fit$call <- call fit$dist <- dist if(model) fit$model <- m if(x) fit$x <- X if(y) fit$y <- Y if(length(parms)) fit$parms <- parms if(any(pterms)) oldClass(fit) <- "survReg.penal" else oldClass(fit) <- "survReg" if(smart) { fit$smart.prediction <- get.smart.prediction() # This is new } fit } attr(survReg, "smart") <- TRUE gls <- ## fits linear model with serial correlation and variance functions, ## by maximum likelihood using a Newton-Raphson algorithm. function(model, data = sys.parent(), correlation = NULL, weights = NULL, subset, method = c("REML", "ML"), na.action = na.fail, control = list(), verbose = F, smart=T) { Call <- match.call() ## control parameters controlvals <- glsControl() controlvals[names(control)] <- control if(smart) { setup.smart("write") # This is new } ## ## checking arguments ## if(!inherits(model, "formula") || length(model) != 3) { stop("\nModel must be a formula of the form \"resp ~ pred\"") } method <- match.arg(method) REML <- method == "REML" ## check if correlation is present and has groups if(!is.null(correlation)) { groups <- getGroupsFormula(correlation) corStr <- getStrataFormula(correlation) } else { groups <- NULL corStr <- NULL } ## create a gls structure containing the plug-ins glsSt <- glsStruct(corStruct = correlation, varStruct = varFunc(weights )) ## extract a data frame with enough information to evaluate ## formula, groups, corStruct, and varStruct mfArgs <- list(formula = asOneFormula(formula(glsSt), model, groups, corStr), data = data, na.action = na.action) if(!missing(subset)) { mfArgs[["subset"]] <- asOneSidedFormula(Call[["subset"]])[[ 2]] } dataMod <- do.call("model.frame", mfArgs) origOrder <- row.names(dataMod) # preserve the original order if(!is.null(groups)) { ## sort the model.frame by groups and get the matrices and parameters ## used in the estimation procedures ## always use innermost level of grouping groups <- eval(parse(text = paste("~1", deparse(groups[[2]]), sep = "|"))) grps <- getGroups(dataMod, groups, level = length( getGroupsFormula(groups, asList = T))) ## ordering data by groups ord <- order(grps) grps <- grps[ord] dataMod <- dataMod[ord, , drop = F] revOrder <- match(origOrder, row.names(dataMod)) } else grps <- NULL ## obtaing basic model matrices X <- model.frame(model, dataMod) ## keeping the contrasts for later use in predict contr <- lapply(X, function(el) if(inherits(el, "factor")) contrasts(el)) contr <- contr[!unlist(lapply(contr, is.null))] X <- model.matrix(model, X) y <- eval(model[[2]], dataMod) N <- nrow(X) p <- ncol(X) # number of coefficients parAssign <- attr(X, "assign") ## creating the condensed linear model attr(glsSt, "conLin") <- list(Xy = array(c(X, y), c(N, ncol(X) + 1), list(row.names(dataMod), c(dimnames(X)[[2]], deparse(model[[ 2]])))), dims = list(N = N, p = p, REML = as.integer(REML)), logLik = 0, sigma = controlvals$sigma) ## initialization glsEstControl <- controlvals[c("singular.ok", "qrTol")] glsSt <- initialize(glsSt, dataMod, glsEstControl) parMap <- attr(glsSt, "pmap") ## ## getting the fitted object, possibly iterating for variance functions ## numIter <- numIter0 <- 0 attach(controlvals) repeat { oldPars <- c(attr(glsSt, "glsFit")[["beta"]], coef(glsSt)) if(length(coef(glsSt))) { # needs ms() aMs <- ms( ~ - logLik(glsSt, glsPars), start = list( glsPars = c(coef(glsSt))), control = list( rel.tolerance = msTol, maxiter = msMaxIter, scale = msScale), trace = msVerbose) coef(glsSt) <- aMs$parameters numIter0 <- aMs$numIter <- aMs$flags[31] } attr(glsSt, "glsFit") <- glsEstimate(glsSt, control = glsEstControl) ## checking if any updating is needed if(!needUpdate(glsSt)) break ## updating the fit information numIter <- numIter + 1 glsSt <- update(glsSt, dataMod) ## calculating the convergence criterion aConv <- c(attr(glsSt, "glsFit")[["beta"]], coef(glsSt)) conv <- abs((oldPars - aConv)/ifelse(aConv == 0, 1, aConv)) aConv <- c(beta = max(conv[1:p])) conv <- conv[ - (1:p)] for(i in names(glsSt)) { if(any(parMap[, i])) { aConv <- c(aConv, max(conv[parMap[, i]])) names(aConv)[length(aConv)] <- i } } if(verbose) { cat("\nIteration:", numIter) if(length(coef(glsSt)) > 0) { cat("\nObjective:", format(aMs$value), ", ms iterations:", aMs$numIter, "\n") print(glsSt) } cat("\nConvergence:\n") print(aConv) } if(max(aConv) <= tolerance) { break } if(numIter > maxIter) { stop("Maximum number of iterations reached without convergence." ) } } detach() ## wrapping up glsFit <- attr(glsSt, "glsFit") namBeta <- names(glsFit$beta) attr(parAssign, "varBetaFact") <- varBeta <- glsFit$sigma * glsFit$ varBeta * sqrt((N - REML * p)/(N - p)) varBeta <- crossprod(varBeta) dimnames(varBeta) <- list(namBeta, namBeta) ## ## fitted.values and residuals (in original order) ## Fitted <- fitted(glsSt) ## putting groups back in original order, if present if(!is.null(grps)) { grps <- grps[revOrder] Fitted <- Fitted[revOrder] Resid <- y[revOrder] - Fitted attr(Resid, "std") <- glsFit$sigma/(varWeights(glsSt)[revOrder] ) } else { Resid <- y - Fitted attr(Resid, "std") <- glsFit$sigma/(varWeights(glsSt)) } ## getting the approximate var-cov of the parameters if(controlvals$apVar) { apVar <- glsApVar(glsSt, glsFit$sigma, .relStep = controlvals[[ ".relStep"]], minAbsPar = controlvals[["minAbsParApVar" ]], natural = controlvals[["natural"]], natUncons = controlvals[["natUnconstrained"]]) } else { apVar <- "Approximate variance-covariance matrix not available" } ## getting rid of condensed linear model and fit dims <- attr(glsSt, "conLin")[["dims"]] dims[["p"]] <- p attr(glsSt, "conLin") <- NULL attr(glsSt, "glsFit") <- NULL attr(glsSt, "fixedSigma") <- (controlvals$sigma > 0) ## ## creating the gls object ## estOut <- list(modelStruct = glsSt, dims = dims, contrasts = contr, coefficients = glsFit[["beta"]], varBeta = varBeta, sigma = glsFit$sigma, apVar = apVar, logLik = glsFit$logLik, numIter = if(needUpdate(glsSt)) numIter else numIter0, groups = grps, call = Call, method = method, fitted = Fitted, residuals = Resid, parAssign = parAssign) if(inherits(data, "groupedData")) { ## saving labels and units for plots attr(estOut, "units") <- attr(data, "units") attr(estOut, "labels") <- attr(data, "labels") } if(!is.null(grps) && any(diff(ord) != 1)) { attr(estOut, "order") <- list(order = ord, revOrder = revOrder) } if(smart) { estOut$smart.prediction <- get.smart.prediction() # This is new } attr(estOut, "namBetaFull") <- dimnames(X)[[2]] oldClass(estOut) <- "gls" estOut } attr(gls, "smart") <- TRUE predict.gls <- function(object, newdata, na.action = na.fail) { ## ## method for predict() designed for objects inheriting from oldClass gls ## if(missing(newdata) || is.null(newdata)) { # will return fitted values return(fitted(object)) } if(length(object$smart.prediction)) { # Smart prediction: handle the prediction flaw setup.smart("read", smart.prediction=object$smart.prediction) } form <- getCovariateFormula(object) mfArgs <- list(formula = form, data = newdata, na.action = na.action) dataMod <- do.call("model.frame", mfArgs) ## making sure factor levels are the same as in contrasts contr <- object$contrasts for(i in names(dataMod)) { if(inherits(dataMod[, i], "factor") && !is.null(contr[[i]])) { levs <- levels(dataMod[, i]) levsC <- dimnames(contr[[i]])[[1]] if(any(wch <- is.na(match(levs, levsC)))) { stop(paste("Levels", paste(levs[wch], collapse = ","), "not allowed for", i)) } # attr(dataMod[,i], "contrasts") <- contr[[i]][levs, , drop = FALSE] contr[[i]] <- contr[[i]][levs, , drop = FALSE] } } N <- nrow(dataMod) if(length(all.vars(form)) > 0) { X <- model.matrix(form, dataMod, contr) } else { X <- array(1, c(N, 1), list(row.names(dataMod), "(Intercept)")) } cf <- coef(object) val <- c(X[, names(cf), drop = F] %*% cf) attr(val, "label") <- "Predicted values" if(!is.null(aux <- attr(object, "units")$y)) { attr(val, "label") <- paste(attr(val, "label"), aux) } if(length(object$smart.prediction)) { wrapup.smart() # not needed actually } val } attr(predict.gls, "smart") <- TRUE # New functions ========================================================= smart.mode.is <- function(mode.arg) { if(missing(mode.arg)) { if(exists(".smart.prediction", frame=1)) { get(".smart.prediction.mode", frame=1) } else { "neutral" } } else { if(exists(".smart.prediction", frame=1)) { get(".smart.prediction.mode", frame=1)==mode.arg } else { mode.arg=="neutral" } } } setup.smart <- function(mode.arg, smart.prediction=NULL, max.smart=30) { actual <- if(mode.arg=="write") vector("list", max.smart) else if(mode.arg=="read") smart.prediction else stop("value of mode.arg unrecognized") assign(".smart.prediction", actual, frame=1) assign(".smart.prediction.counter", 0, frame=1) assign(".smart.prediction.mode", mode.arg, frame=1) assign(".max.smart", max.smart, frame=1) } wrapup.smart <- function() { # not needed actually remove(".smart.prediction", frame=1) remove(".smart.prediction.counter", frame=1) remove(".smart.prediction.mode", frame=1) remove(".max.smart", frame=1) } get.smart.prediction <- function() { smart.prediction.counter <- get(".smart.prediction.counter", frame=1) max.smart <- get(".max.smart", frame=1) if(smart.prediction.counter > 0) { # Save this on the object for smart prediction later smart.prediction <- get(".smart.prediction", frame=1) if(max.smart >= (smart.prediction.counter+1)) for(i in max.smart:(smart.prediction.counter+1)) smart.prediction[[i]] <- NULL smart.prediction } else NULL } put.smart <- function(smart) { # Puts the info, if possible, in frame 1. # Doesn't returns whether it did it or not. # if(ret <- smart.mode.is("write")) { # Write the info to frame 0 as well max.smart <- get(".max.smart", frame=1) smart.prediction.counter <- get(".smart.prediction.counter", frame=1) smart.prediction <- get(".smart.prediction", frame=1) smart.prediction.counter <- smart.prediction.counter + 1 if(smart.prediction.counter > max.smart) { # if list is too small, make it larger max.smart <- max.smart + (inc.smart <- 10) # can change inc.smart smart.prediction <- c(smart.prediction, vector("list", inc.smart)) assign(".max.smart", max.smart, frame=1) } smart.prediction[[smart.prediction.counter]] <- smart assign(".smart.prediction", smart.prediction, frame=1) assign(".smart.prediction.counter", smart.prediction.counter, frame=1) # } # ret } get.smart <- function() { # Returns one list component of information smart.prediction <- get(".smart.prediction", frame=1) smart.prediction.counter <- get(".smart.prediction.counter", frame=1) smart.prediction.counter <- smart.prediction.counter + 1 assign(".smart.prediction.counter", smart.prediction.counter, frame=1) smart <- smart.prediction[[smart.prediction.counter]] smart } smart.expression <- expression({ # This expression only works if the first argument of the smart # function is "x", e.g., smartfun(x, ...) smart <- get.smart() assign(".smart.prediction.mode", "neutral", frame=1) ans.smart <- do.call(as.character(match.call()[1]), c(list(x=x), smart)) assign(".smart.prediction.mode", "read", frame=1) ans.smart }) is.smart <- function(fun) { if(is.logical(a <- attr(fun, "smart"))) a else FALSE } # # Smart functions below here ========================================== bs <- function(x, df = NULL, knots = NULL, degree = 3, intercept = F, Boundary.knots = NULL, derivs = NULL) { # Notes. # 1. the Boundary.knots are actually range(x), but are NULL here # because of smart prediction. # 2. the derivs are actually rep(0, length(x)), but are NULL here # because of smart prediction. # 3. This function is difficult to make smart. There may be bugs!! # I've changed "missing" to "is.null" x <- x # Evaluate x if(is.null(Boundary.knots)) Boundary.knots <- range(x) if(is.null(derivs)) derivs <- rep(0, length(x)) if(smart.mode.is("read")) { return(eval(smart.expression)) } nx <- names(x) x <- as.vector(x) nax <- is.na(x) if(nas <- any(nax)) x <- x[!nax] ord <- degree + 1 if(!is.null(Boundary.knots)) { if(length(Boundary.knots) == 1 && Boundary.knots == T) { Boundary.knots <- range(knots) knots <- sort(knots)[ - c(1, length(knots))] } Boundary.knots <- range(Boundary.knots) outside <- (ol <- x < Boundary.knots[1]) | (or <- x > Boundary.knots[2]) } else outside <- rep(F, length = length(x)) if(!is.null(df) && is.null(knots)) { nIknots <- df - ord + (1 - intercept) if(nIknots < 0) { nIknots <- 0 warning(paste("df was too small; have used ", ord - (1 - intercept))) } if(nIknots > 0) { knots <- seq(from = 0, to = 1, length = nIknots + 2)[ - c(1, nIknots + 2)] knots <- quantile(x[!outside], knots) } else knots <- NULL } Aknots <- sort(c(rep(Boundary.knots, ord), knots)) if(!is.null(derivs) && length(derivs) == 1) derivs <- rep(derivs, length(x)) if(any(outside)) { warning("Some x values beyond boundary knots may cause ill-conditioned bases") scalef <- c(1, cumprod(seq(degree))) basis <- array(0, c(length(x), length(Aknots) - ord)) if(any(ol)) { k.pivot <- Boundary.knots[1] xl <- cbind(1, outer(x[ol] - k.pivot, 1:degree, "^")) tt <- spline.des(Aknots, rep(k.pivot, ord), ord, 0: degree)$design basis[ol, ] <- xl %*% (tt/scalef) if(!is.null(derivs) && any(derivs[ol] > 0)) { for(j in 1:degree) { if(!is.element(j, derivs[ol])) next basis[ol & derivs == j, ] <- xl[derivs[ ol] == j, , drop = F] %*% ( rbind(tt[ - (1:j), , drop = F], matrix(0, j, ncol(tt)))/scalef) } basis[ol & derivs > degree, ] <- 0 } } if(any(or)) { k.pivot <- Boundary.knots[2] xr <- cbind(1, outer(x[or] - k.pivot, 1:degree, "^")) tt <- spline.des(Aknots, rep(k.pivot, ord), ord, 0: degree)$design basis[or, ] <- xr %*% (tt/scalef) if(!is.null(derivs) && any(derivs[or] > 0)) { for(j in 1:degree) { if(!is.element(j, derivs[ol])) next basis[or & derivs == j, ] <- xr[derivs[ or] == j, , drop = F] %*% ( rbind(tt[ - (1:j), , drop = F], matrix(0, j, ncol(tt)))/scalef) } basis[or & derivs > degree, ] <- 0 } } if(any(inside <- !outside)) basis[inside, ] <- spline.des(Aknots, x[inside], ord, derivs[inside])$design } else basis <- spline.des(Aknots, x, ord, derivs)$design if(!intercept) basis <- basis[, -1, drop = F] n.col <- ncol(basis) if(nas) { nmat <- matrix(NA, length(nax), n.col) nmat[!nax, ] <- basis basis <- nmat } dimnames(basis) <- list(nx, 1:n.col) a <- list(degree = degree, knots = knots, Boundary.knots = Boundary.knots, intercept = intercept, class = c("bs", "basis")) attributes(basis) <- c(attributes(basis), a) if(smart.mode.is("write")) { if(any(derivs != derivs[1])) stop("can't handle unequal derivs values when smart predicting") # largely for convenience because only first value needs to be saved # Could save the whole derivs vector, but recycling would be needed # later when predicting at a different sized data put.smart(list(degree=degree, intercept=intercept, df=df, knots=knots, Boundary.knots=Boundary.knots, derivs = derivs[1])) } basis } attr(bs, "smart") <- TRUE ns <- function(x, df = NULL, knots = NULL, intercept = F, Boundary.knots = NULL, derivs = NULL) { # See comments about smart prediction is bs() x <- x # Evaluate x if(is.null(Boundary.knots)) Boundary.knots <- range(x) if(is.null(derivs)) derivs <- rep(0, length(x)) if(smart.mode.is("read")) { if(!all(derivs == derivs[1])) stop("can't handle unequal derivs when smart predicting") # because length(x) is different! return(eval(smart.expression)) } nx <- names(x) x <- as.vector(x) nax <- is.na(x) if(nas <- any(nax)) x <- x[!nax] if(!is.null(Boundary.knots)) { if(length(Boundary.knots) == 1 && Boundary.knots == T) { Boundary.knots <- range(knots) knots <- sort(knots)[ - c(1, length(knots))] } Boundary.knots <- range(Boundary.knots) outside <- (ol <- x < Boundary.knots[1]) | (or <- x > Boundary.knots[2]) } else outside <- rep(F, length = length(x)) if(!is.null(df) && is.null(knots)) { # df = number(interior knots) + 1 + intercept nIknots <- df - 1 - intercept if(nIknots < 0) { nIknots <- 0 warning(paste("df was too small; have used ", 1 + intercept)) } if(nIknots > 0) { knots <- seq(from = 0, to = 1, length = nIknots + 2)[ - c(1, nIknots + 2)] knots <- quantile(x[!outside], knots) } else knots <- NULL } Aknots <- sort(c(rep(Boundary.knots, 4), knots)) if(!is.null(derivs) && length(derivs) == 1) derivs <- rep(derivs, length(x)) if(any(outside)) { basis <- array(0, c(length(x), length(knots) + 4)) if(any(ol)) { k.pivot <- Boundary.knots[1] xl <- cbind(1, x[ol] - k.pivot) tt <- spline.des(Aknots, rep(k.pivot, 2), 4, c(0, 1))$ design basis[ol, ] <- xl %*% tt if(!is.null(derivs) && any(derivs[ol] > 0)) { basis[ol & derivs == 1, ] <- rep(tt[2, ], each = sum(ol & derivs == 1)) basis[ol & derivs > 1, ] <- 0 } } if(any(or)) { k.pivot <- Boundary.knots[2] xr <- cbind(1, x[or] - k.pivot) tt <- spline.des(Aknots, rep(k.pivot, 2), 4, c(0, 1))$ design basis[or, ] <- xr %*% tt if(!is.null(derivs) && any(derivs[or] > 0)) { basis[or & derivs == 1, ] <- rep(tt[2, ], each = sum(or & derivs == 1)) basis[or & derivs > 1, ] <- 0 } } if(any(inside <- !outside)) basis[inside, ] <- spline.des(Aknots, x[inside], 4, derivs[inside])$design } else basis <- spline.des(Aknots, x, 4, derivs)$design const <- spline.des(Aknots, Boundary.knots, 4, c(2, 2))$design if(!intercept) { const <- const[, -1, drop = F] basis <- basis[, -1, drop = F] } qr.const <- qr(t(const)) basis <- as.matrix((t(qr.qty(qr.const, t(basis))))[, - (1:2)]) n.col <- ncol(basis) if(nas) { nmat <- matrix(NA, length(nax), n.col) nmat[!nax, ] <- basis basis <- nmat } dimnames(basis) <- list(nx, 1:n.col) a <- list(degree = 3, knots = knots, Boundary.knots = Boundary.knots, intercept = intercept, class = c("ns", "basis")) attributes(basis) <- c(attributes(basis), a) if(smart.mode.is("write")) { if(any(derivs != derivs[1])) stop("can't handle unequal derivs values when smart predicting") put.smart(list(df=ncol(basis), knots=knots, intercept=intercept, Boundary.knots=Boundary.knots, derivs = derivs[1])) } basis } attr(ns, "smart") <- TRUE predict.bs <- function(object, newx, ..., derivs) { if(length(list(...))) stop("unrecognized arguments") if(missing(newx)) { if(!missing(derivs)) stop("Cannot calculated derivatives unless newx is supplied") return(object) } a <- c(list(x = newx), attributes(object)[c("degree", "knots", "Boundary.knots", "intercept")], if(!missing(derivs)) list( derivs = derivs)) do.call("bs", a) } attr(predict.bs, "smart") <- TRUE predict.ns <- function(object, newx, ..., derivs) { if(length(list(...))) stop("unrecognized arguments") if(missing(newx)) { if(!missing(derivs)) stop("Cannot calculated derivatives unless newx is supplied") return(object) } a <- c(list(x = newx), attributes(object)[c("knots", "Boundary.knots", "intercept")], if(!missing(derivs)) list(derivs = derivs)) do.call("ns", a) } attr(predict.ns, "smart") <- TRUE poly.raw <- function(x, degree = 1, xname = NULL, coefs=NULL) { # New: coefs set to equal NULL by default (previously uninitialized) x <- x # Evaluate x if(smart.mode.is("read")) { smart <- get.smart() degree <- smart$degree # Overwrite its value coefs <- smart$coefs # Overwrite its value xname <- smart$xname # Overwrite its value } if(degree < 1) stop("bad value for degree") namesx <- names(x) nax <- is.na(x) if(nas <- any(nax)) x <- x[!nax] n <- length(x) xname <- if(is.null(xname)) as.character(1:degree) else paste(xname, 1:degree, sep = "^") if(is.null(coefs)) { if(degree >= n) stop("degree cannot exceed length(x)-1") alpha <- double(degree) norm2 <- rep(as.double(0), degree + 2) } else { alpha <- coefs$alpha norm2 <- coefs$norm2 } mat <- .Fortran("orth", as.double(x), p = double(n * (degree + 2)), as.integer(n), as.integer(degree), alpha = alpha, norm2 = norm2) coefs <- mat[c("alpha", "norm2")] mat <- mat$p[-1:(-2 * n)] if(smart.mode.is("write")) put.smart(list(degree=degree, xname=xname, coefs=coefs)) if(nas) { nmat <- structure(array(NA, c(length(nax), degree), list(namesx, xname)), degree = 1:degree, coefs = coefs) nmat[!nax, ] <- mat nmat } else structure(array(mat, c(n, degree), list(namesx, xname)), degree = 1:degree, coefs = coefs) } attr(poly.raw, "smart") <- TRUE poly <- poly attr(poly, "smart") <- TRUE # For the User's point of view scale <- function(x, center = T, scale.arg = T) { if(is.data.frame(x)) x <- numerical.matrix(x) else if(!is.matrix(x)) x <- as.matrix(x) center.smart <- center scale.smart <- scale.arg if(smart.mode.is("read")) { return(eval(smart.expression)) } d <- dim(x) n <- d[1] p <- d[2] if(!is.logical(center)) { if(length(center) != p) stop("length of center must match number of columns of x") x <- x - rep(center, each = n) center.smart <- center } else if(center) x <- x - rep(center.smart <- colMeans(x, na.rm = T), each = n) if(!is.logical(scale.arg)) { if(length(scale.arg) != p) stop("length of scale must match number of columns of x") x <- x/rep(scale.smart <- scale.arg, each = n) } else if(scale.arg) { # I've changed scale.smart <- if(missing(center)) ... scale.smart <- if(is.logical(center) && center) colStdevs(x, na.rm = T) else sqrt(colSums(x^2, na.rm = T)/pmax(0, n - 1 - (if(length(w <- which.na(x))) tabulate(ceiling( w/n), p) else 0))) x <- x/rep(scale.smart, each = n) } if(smart.mode.is("write")) { put.smart(list(center=center.smart, scale.arg=scale.smart)) } x } attr(scale, "smart") <- TRUE # # Home baked smart functions below here =============================== # "my1" is an example "my1" <- function(x, minx=min(x)) { x <- x # Evaluate x if(smart.mode.is("read")) { smart <- get.smart() minx <- smart$minx # Overwrite its value } else if(smart.mode.is("write")) put.smart(list(minx=minx)) (x-minx)^2 } attr(my1, "smart") <- TRUE # "my2" is equivalent to "my1". This format is useful if there are # many arguments "my2" <- function(x, minx=min(x)) { x <- x # Evaluate x if(smart.mode.is("read")) { return(eval(smart.expression)) } else if(smart.mode.is("write")) put.smart(list(minx=minx)) (x-minx)^2 } attr(my2, "smart") <- TRUE # stdze1 is another (useful) example. It standardizes a variable. "stdze1" <- function(x, center=TRUE, scale=TRUE) { x <- x # Evaluate x if(!is.vector(x)) stop("x must be a vector") if(smart.mode.is("read")) { smart <- get.smart() return((x-smart$center)/smart$scale) } if(is.logical(center)) center <- if(center) mean(x) else 0 if(is.logical(scale)) scale <- if(scale) sqrt(var(x)) else 1 if(smart.mode.is("write")) put.smart(list(center=center, scale=scale)) # Normal use (x-center)/scale } attr(stdze1, "smart") <- TRUE "stdze2" <- function(x, center=TRUE, scale=TRUE) { x <- x # Evaluate x if(!is.vector(x)) stop("x must be a vector") if(smart.mode.is("read")) { return(eval(smart.expression)) } if(is.logical(center)) center <- if(center) mean(x) else 0 if(is.logical(scale)) scale <- if(scale) sqrt(var(x)) else 1 if(smart.mode.is("write")) put.smart(list(center=center, scale=scale)) (x-center)/scale } attr(stdze2, "smart") <- TRUE