### File of examples to be run for lecture slides date() rm(list=ls()) opSys <- Sys.info()[1] if (opSys=="Windows"){ filePrefix <- "C:/dscott/Teaching/782/Lectures/RExamples" }else{ filePrefix <- "~/Teaching/782/Lectures/RExamples/" } setwd(filePrefix) ### ### Basic S concepts ### ### Calculation 1 + 2 1/2 17^2 ### Grouping 1 + 2 * 3 (1 + 2) * 3 ### Functions sqrt(2) log(10) qnorm(.975) ### Assignment z = 17 z rm(z) z z <- 17 z rm(z) z 17 -> z z ### Using variables z <- 17 z <- z + 23 z ### Special values 1/0 -1/0 0/0 100 + Inf 100 + NaN 100/Inf Inf/Inf ### ### Practicalities ### ### Help help(solve) ?solve help(if) help("if") help.search("unix") help(package="HyperbolicDist") example(system.time) ### Assignments and expressions 1:10 x <- 1:10 (x <- 10:20) ### ### Data structures ### ### Examples with objects ### Coercion z <- 0:9 z z <- as.character(z) z z <- as.integer(z) z ### Null objects x <- numeric() x mode(x) length(x) x <- character() x mode(x) length(x) x <- list() x mode(x) length(x) x <- list(NULL) x mode(x) length(x) rm(x) x[1] <- 3 x <- numeric() x[10] <- 3 x ### Set attributes x <- rpois(20,1) x attr(x,"dim") <- c(4,5) x attr(x,"dim") <- c(5,4) x row.names(x) <- letters[1:(dim(x)[1])] row.names(x) x x <- as.data.frame(x) x names(x) <- LETTERS[1:dim(x)[2]] x y <- unclass(x) y str(x) str(y) attributes(x) attributes(y) head(x,2) head(y,2) args(rpois) ### Methods methods(head) head.matrix head.default getS3method("head","default") getAnywhere(head.default) utils:::head.default methods(plot) getS3method("plot","table") ### Factors state <- c("tas","sa","qld","nsw","nsw","nt","wa","wa", "qld","vic","nsw","vic","qld","qld","sa","tas", "sa","nt","wa","vic","qld","nsw","nsw","wa", "sa","act","nsw","vic","vic","act") statef <- factor(state) statef levels(statef) incomes <- c(60,49,40,61,64,60,59,54,62,69,70,42,56, 61,61,61,58,51,48,65,49,49,41,48,52,46, 59,46,58,43) incmeans <- tapply(incomes,statef,mean) round(incmeans,2) example(factor) statefr <- table(statef) statefr incomef <- factor(cut(incomes,breaks=35+10*(0:7))) table(incomef,statef) ### Lists Lst <- list(name="Fred",wife="Mary",no.children=3, child.ages=c(4,7,9)) Lst str(Lst) Lst$name Lst[[1]] Lst[["name"]] Lst[1] Lst[[4]] Lst[4] ### Data frames accountants <- data.frame(home=statef,loot=incomes, shot=incomef) accountants table(home) attach(accountants) table(home) detach(accountants) ### ### Vectors ### x <- c(1, 2, 4, 3, 1) x 2 * x x/4 x <- c(1, 2, 4, 3, 1) length(x) mode(x) ### Subsets x <- c(1, 2, 4, 3, 1) x[5] x[3] x[c(1, 2, 3)] x[-5] x[x > 2] x[1:2] <- 10 x ### Sequences 1:10 10:1 options(width=40) 1:50 options(width=70) seq(0, 5, by = 0.2) seq(0, 4, length = 9) rep(1:3, 5) rep(1:3, c(3, 4, 3)) ### Arithmetic 1:3 + 1:6 x <- 1:6 sum(x) prod(x) min(x) max(x) range(x) prod(1:10) prod(1:200) prod(1:10) / prod(1:3, 1:7) cumsum(1:10) cumprod(1:10) ### Statistical summaries x <- 1:10 mean(x) median(x) var(x) sd(x) quantile(1:10) quantile(1:10, 0:10/10) ### Character strings greeting <- "hello, world" greeting number <- "21" number c(greeting, number) c(greeting, 100) "100" + 10 ### Names x <- 1:5 x l <- c("a","b","c","d","e") l names(x) <- l x x[4:5] x[c("a","d")] ### ### Matrices ### A <- matrix(1:6, nrow = 3, ncol = 2) A B <- matrix(1:6, nrow = 3, ncol = 2, byrow = TRUE) B A <- matrix(1:6, nrow = 3, ncol = 2) nrow(A) ncol(A) dim(A) ### Special matrices A <- diag(1:3) A diag(A) diag(3) matrix(1, nrow = 3, ncol = 3) ### Matrix arithmetic A <- matrix(1:6, nrow = 3, ncol = 2) B <- matrix(1:6, nrow = 3, ncol = 2, byrow = TRUE) A + B A t(A) t(A) %*% B A <- matrix(c(1, 3, 2, 4), ncol = 2) b <- c(1, 1) solve(A, b) solve(A) A %*% solve(A) ### Regression x <- matrix(rnorm(6), nrow = 3) y <- rnorm(3) n <- nrow(x) p <- ncol(x) betahat <- solve(t(x) %*% x, t(x) %*% y) epsilonhat <- y - x %*% betahat sigmahat2 <- sum(epsilonhat^2) / (n - p) D <- sigmahat2 * solve(t(x) %*% x) ### Matrix decompositions A <- matrix(c(1, 3, 2, 4), ncol = 2) eigen(A) svd(A) qr(A) ### Subsetting A <- matrix(1:6, ncol=3) A A[1,2] A[1:2,1:2] A[,2:3] A[2,] ### Row and column indices A <- matrix(1:6, ncol=3) row(A) col(A) A <- matrix(1:9, ncol=3) A[row(A) > col(A)] <- 0 A ### Labels A <- matrix(1:6, nrow=3) dimnames(A) <- list(c("sex","drugs","rock&roll"), c("this", "that")) A dimnames(A) <- list(what=c("sex","drugs","rock&roll"), which=c("this", "that")) A ### Multiway arrays (A <- array(1:5,c(2,3,4))) aperm(A) aperm(A,c(2,1,3)) ### ### Text handling ### print("A Title") print(matrix(1:4,nc=2)) print(c("Hello", "there"), quote=FALSE) print(1/1:5, digits=2) print(c(1, NA, Inf), na.print=".") R2 <- .7863 cat("aaa", "bbb", "\n") cat("aaa", "bbb", "\n", sep="") cat("a","b","c","d","\n",sep=" - ") cat("R-squared =", R2, "\n") letters cat("The alphabet: ", letters,"\n",sep="") round(runif(5), 2) signif(runif(5), 2) formatC(1/3, format="f", digits=4) formatC(1/3, format="e", digits=4) paste("Var", 1:4) paste("Var", 1:4, sep="-", collapse=":") ### Latex tables x <- matrix(1:4, nc=2, dimnames=list(c("a", "b"), c("c", "d"))) latex.table <- function(x) { rlabs <- dimnames(x)[[1]] clabs <- dimnames(x)[[2]] fmt <- c("l", rep("r", length(clabs))) cat("\\begin{tabular}[", fmt, "]\n", sep="") cat(" ", clabs, sep = " & ") cat(" \\\\\n") for(i in 1:nrow(x)) { cat(rlabs[i], x[i,], sep=" & ") cat(" \\\\\n") } cat("\\end{tabular}\n") } x latex.table(x) ### Substrings substr("abcdef", 2, 4) substring("abcdef", 1:6, 1:6) substr(rep("abcdef",4), 1:4, 4:5) substr("abcdef", 1:4, 4:5) substring("abcdef", 1:4, 4:5) x <- "123456789" substring(x, 2) <- "aaa" x x <- "123456789" substring(x, 2, 3) <- "aaa" x strsplit("hello there world", " ") strsplit("hello there\tworld", " ") strsplit("hello there\tworld", "[[:space:]]+") x <- "all the king's men" x <- unlist(strsplit(x, " ")) x grep("e", x) x[grep("e", x)] x <- "The color of money colors my decision" sub("color", "colour", x) gsub("color", "colour", x) x <- "all the king's men" x <- unlist(strsplit(x, " ")) x gsub("(.*)", "\\1=\\1", x) ### ### Importing and exporting data ### ### Exporting data args(write.table) library(MASS) data(hills) write.table(hills,"hills.txt",col.names=NA) ?write.csv ### Importing data library(HyperbolicDist) data(package="HyperbolicDist") data(resistors,package="HyperbolicDist") data(mamquam,package="HyperbolicDist") mamquamNew <- edit(mamquam) new.df <- edit(data.frame()) ### ### Programming ### ### Solving a quadratic equation ### Specify coefficients a, b, c = const a <- 1; b <- 2; const <- 1 Delta <- b^2 - 4*a*const # the discriminant if ( Delta < 0){ soln <- c(NA,NA) cat("No real solutions to this quadratic equation\n") }else{ soln <- c(-b + sqrt(Delta), -b - sqrt(Delta))/(2*a) } soln ### Second example a <- 1; b <- 2; const <- 4 Delta <- b^2 - 4*a*const # the discriminant if ( Delta < 0){ soln <- c(NA,NA) cat("No real solutions to this quadratic equation\n") }else{ soln <- c(-b + sqrt(Delta), -b - sqrt(Delta))/(2*a) } soln ### Third example a <- 1; b <- 2; const <- -4 Delta <- b^2 - 4*a*const # the discriminant if ( Delta < 0){ soln <- c(NA,NA) cat("No real solutions to this quadratic equation\n") }else{ soln <- c(-b + sqrt(Delta), -b - sqrt(Delta))/(2*a) } soln ### ifelse x <- c(3:-3) sqrt(x) # gives warning sqrt(ifelse(x >= 0, x, NA)) ### for ### Calculate and print the first 6 elements ### of the Fibonacci series fibonacci <- numeric(6) fibonacci[1] <- 1 cat("The 1st Fibonacci number is: ", fibonacci[1], "\n") fibonacci[2] <- 1 cat("The 2nd Fibonacci number is: ", fibonacci[2], "\n") for (i in 3:6){ fibonacci[i] <- fibonacci[i - 1] + fibonacci[i - 2] cat("The ",i,"th Fibonacci number is: ", fibonacci[i], "\n") } ### while ### Toss a die until a 6 is obtained toss <- ceiling(6*runif(1)) # ceiling function count <- 1 while (toss != 6){ toss <- ceiling(6*runif(1)) cat('Toss ', count, ' was a ', toss, '\n') count <- count + 1 } cat('There were ', count-2, ' tosses before the first 6\n') ### Functions square <- function(x) x * x square(10) square(1:10) sumsq <- function(x) sum(square(x)) sumsq(1:10) hypot <- function(a, b) { sqrt(a^2 + b^2) } hypot(3, 4) sumsq <- function(x, about = 0){ sum((x - about)^2) } sumsq(1:10) sumsq(1:10, mean(1:10)) sumsq(1:10, about=mean(1:10)) sumsq(about=mean(1:10), 1:10) sumsq <- function(x, about = 0){ sum((x - about)^2) } sumsq(1:10, mean(1:10)) sumsq(1:10, about=mean(1:10)) sumsq(1:10, a=mean(1:10)) sumsq(x=1:10, mean(1:10)) sumsq(mean(1:10), x=1:10) ### ### Dates and Times ### Sys.Date() attributes(Sys.Date()) ### Read data in datetime <- read.csv("DateTimeData.csv") str(datetime) datetime <- read.csv("DateTimeData.csv",as.is=c(2,3)) str(datetime) head(datetime) ### Make the activity date be of class "Date" unclass(datetime$ActDate[1:10]) datetime$ActDate <- as.Date(datetime$ActDate,"%d-%b-%y") ?strftime str(datetime) attach(datetime) ActDate[1:10] attributes(ActDate) unclass(ActDate[1:10]) format.Date(ActDate[1:10],"%d/%m/%Y") format(ActDate[1:10],"%d/%b/%Y") ### What is origin? structure(0,class="Date") ?structure structure(-1,class="Date") unclass(as.Date("1969-01-01")) ### Operate on Date objects ActDate[51:60]-ActDate[1:10] difftime(ActDate[51:60],ActDate[1:10]) difftime(ActDate[51:60],ActDate[1:10],unit="hours") difftime(ActDate[51:60],ActDate[1:10],unit="secs") mean(ActDate[51:60]-ActDate[1:10]) ActDate[1] ActDate[1]+5 ActDate[1]+floor(5) floor(5) ### Use the class to create useful labels on plots plot(ActDate,ID) plot(ActDate[20*(1:20)],ID[20*(1:20)]) ### Create time and date object using strptime testTime <- strptime(Time,format="%H:%M") ### Creates POSIXlt object attributes(testTime) testTime[1:10] strftime(testTime[1:10],format="%a") strftime(testTime[1:10],format="%I:%M %p") ### Look at Date objects ActDate[1:2] as.POSIXct(ActDate[1:2]) attributes(as.POSIXct(ActDate[1:2])) str(as.POSIXct(ActDate[1:2])) strftime(as.POSIXct(ActDate[1:2]),format="%d-%b-%y") strftime(as.POSIXlt(ActDate[1:2]),format="%d-%b-%y") strftime(as.POSIXlt(ActDate[1:2]),tz="GMT") strftime(as.POSIXlt(ActDate[1:2]),format="%d-%b-%y %H:%M",tz="GMT") strftime(as.POSIXlt(ActDate[1:2]),format="%d-%b-%y %H:%M, %Z",tz="NZST12NZDT") strftime(as.POSIXlt(ActDate[1:2]),format="%d-%b-%y %H:%M, %z",tz="GMT") ### Create POSIX date and time objects detach(datetime) ### Look at raw data datetime$ActDate[1:10] unclass(datetime$Time)[1:10] datetime$DateTime <- as.POSIXct(paste(datetime$ActDate,datetime$Time)) ### Data assumed to be from time zone GMT, printed in NZST datetime$DateTime[1:10] ### ### Miscellanea ### ### Scope f <- function(x){ y <- 2*x print(x) print(y) print(z) } cube <- function(n){ sq <- function() n*n n*sq() } cube(2) ### Add-on packages .libPaths() ### Mathematical expressions ?plotmath par(ask=TRUE) example(plotmath) demo(plotmath) ### optimisation par(ask=TRUE) ?optim example(optim) ### ### Extended example ### library(Devore5) data(xmp04.30) str(xmp04.30) sum(dweibull(xmp04.30$lifetime, shape=1, scale=1000, log=TRUE)) llfunc <- function(x){ -sum(dweibull(xmp04.30$lifetime, shape=x[1], scale=x[2], log=TRUE)) } mle <- nlm(llfunc, c(shape = 1, scale = 1000), hessian = TRUE) str(mle) solve(mle$hessian) # approximate variance-covariance matrix postscript("WeibullDens.eps",height=6,onefile=TRUE) plot(function(x) dweibull(x, shape = 2.15, scale = 1289.34), 0, 3000, col = "red", xlab = "lifetime (hr)", ylab = "density", main = "Weibull density using MLEs from the lifetime data") rug(xmp04.30$lifetime, col = "blue") dev.off() grid <- matrix(0.0, nrow = 101, ncol = 101) scvals <- seq(0.5, 3.5, len = 101) # scale parameter shvals <- seq(500, 2500, len = 101) # shape parameter} for (i in seq(along = scvals)){ for (j in seq(along = shvals)){ grid[i,j] <- llfunc(c(scvals[i], shvals[j])) } } postscript("WeibullContours.eps",height=7,onefile=TRUE) par(mfrow=c(1,2)) contour(scvals, shvals, grid, levels = 77:85) points(mle$estimate[1], mle$estimate[2], pch = "+", cex = 1.5) title(xlab = expression(alpha), ylab = expression(beta)) contour(scvals, shvals, grid, levels = mle$min + qchisq(c(0.5,0.8,0.9,0.95,0.99), 2), labels = paste(c(50,80,90,95,99), "%", sep = "")) title(xlab = expression(alpha), ylab = expression(beta)) dev.off() ### lattice examples ### Change current background colour to transparent ###lset(theme = col.whitebg()) library(lattice) data(quakes) ?quakes str(quakes) Depth <- equal.count(quakes$depth, number=8, overlap=.1) postscript("quakesGrey.eps",onefile=FALSE) xyplot(lat ~ long | Depth, data = quakes) dev.off() ### Build up example library(foreign) data.restore("visualizing.data") xyplot(NOx ~ E, data=ethanol) xyplot(NOx ~ E|C, data=ethanol) xyplot(NOx ~ E|C, data=ethanol,layout=c(2,3,1)) xyplot(NOx ~ E|C, data=ethanol,layout=c(2,2,1)) EE <- equal.count(ethanol$E, number = 9,overlap = 1/4) str(EE) EE plot(EE,xlab="Range of E",ylab="Interval") plot(EE) xyplot(NOx ~ C|EE, data=ethanol,aspect=2) xyplot(NOx ~ C|EE, data=ethanol, subset=C > 8) ### Barley data dotplot(variety ~ yield| year*site, data = barley, xlab = "Barley Yield (bushels/acre)", par.strip.text=list(cex=0.5), scales=list(cex=0.5,cex.lab=0.5, cex.main=0.5,cex.axis=0.5, cex.sub=0.5)) dotplot(variety ~ yield| year*site, data = barley, xlab = "Barley Yield (bushels/acre)",cex=0.5,layout=c(2,2,3)) ### Combining panels xyplot(NOx ~E|C, data = ethanol, panel = function(x,y){ panel.xyplot(x,y) panel.loess(x,y) } ) ### Printing for (i in 1:5){ i } for (i in 1:5){ print(i) } par(mfrow=c(2,1)) for (i in 1:2){ hist(rnorm(100)) } for (i in 1:2){ histogram(rnorm(100)) } for (i in 1:2){ print(histogram(rnorm(100))) } ### ### Object oriented programming ### ### S3 class system x <- rep(0:1, c(10,20)) x class(x) summary(x) y <- as.factor(x) class(y) summary(y) ### Problems with S3 nofactor <- "This is not a factor" class(nofactor) class(nofactor) <- "factor" nofactor <- factor(nofactor) class(nofactor) summary(nofactor) class(nofactor) <- "lm" class(nofactor) summary(nofactor) t methods("t") methods(class="test") library(tools) .makeS3MethodsStopList() cbind(1:3, 4:6) is.vector(1:3) is.matrix(cbind(1:3, 4:6)) is.vector(1:3) is.integer(1:3) class(1:3) class(cbind(1:3, 4:6)) class("abc") print(1:3) print(matrix(1:6, nrow=3)) print("abc") library(MASS) data(birthwt) fit <- lm(bwt ~ ., data=birthwt) class(fit) print(fit) print.lm(fit) x <- 1:10 y <- rnorm(10) plot(x, y) plot(y ~ x) plot(lm(y ~ x)) plot(sin) size <- function(x, ...) UseMethod("size") size.default <- function(x) length(x) size.data.frame <- function(x) dim(x) size.list <- function(x) lapply(x, size) size(1:3) size(c("a","a","a","b","b","c")) size(data.frame(1:3, c("a","b","c"))) size(list(x=1:3, y=c("a","b","c"), z= list(m=1:2, n=letters))) is.String <- function(x) class(x) == "String" String <- function(x) { if( !is.String(x) ) { x <- as.character(x) class(x) <- "String" } x } x <- "This is a sentence" as.character(x) class(x) a <- String(x) a class(a) attr(String(a), "class") is.String(a) print.String <- function(x, ...) cat(x, "\n") length.String <- function(x, ...) nchar(x) summary.String <- function(x, ...) table(strsplit(x, "")) print(a) a length(a) summary(a) slice <- function(x, index) UseMethod("slice") slice.String <- function(x, index) { a <- unlist(strsplit(x, ""))[index] String(paste(a, sep="", collapse="")) } slice(a, 1:5) "[.String" <- function(x, index) slice(x, index) "+.String" <- function(x, y) String(paste(x, String(y), sep="")) a[1] a[2:5] a + ", and second" + ", and third" setClass("String", representation(string="character")) setClass("myclass", representation(a="character", b="numeric")) String <- function(x) new("String", string=as.character(x)) String("This is a sentence") a <- String("This is a sentence") a setMethod("show", "String", function(object) cat(object@string, "\n")) show(a) show("This is a sentence") setMethod("length", "String", function(x) nchar(x@string)) setMethod("summary", "String", function(object, ...) table(strsplit(object@string, ""))) methods(summary) args(summary) class(a) summary(a) setGeneric("slice", function(x, index) standardGeneric("slice")) setMethod("slice", "String", function(x, index) { a <- unlist(strsplit(x@string, ""))[index] String(paste(a, sep="", collapse="")) } ) slice(a, 1:6) methods(slice) setMethod("[", "String", function(x, i, j, ..., drop) slice(x, i) ) a[1:6] methods("[")