# sample code for bootstrapping

URL = "https://www.stat.auckland.ac.nz/~lee/760/housing.csv"
boston.df = read.table(URL, header=FALSE, sep=",")
names(boston.df)  = c("CRIM","ZN","INDUS","CHAS","NOX","RM","AGE",
"DIS","RAD","TAX","PTRATIO","B","LSTAT","MEDV") 

formula = MEDV ~ .
library(randomForest)
n = dim(boston.df)[1]
B = 50

opt = numeric(B)
fit.data = randomForest(formula, data=boston.df, mtry=5, ntree=60)

for( b in 1:B){
  bdata = boston.df[sample(n, replace=TRUE),]
  fit.bs = randomForest(formula, data=bdata, mtry=5, ntree=60)
  opt[b]= mean((boston.df$MEDV - predict(fit.bs,
    newdata=boston.df))^2) - 
    mean((bdata$MEDV-fit.bs$predicted)^2)
}

mean((boston.df$MEDV-fit.data$predicted)^2) + mean(opt)


# sample code for cross-validation ( with mtry=5) 

nfold=5
Repeats = 5
n=dim(boston.df)[1]
m <- n%/%nfold
CV = numeric(Repeats)
pred.error=numeric(nfold)
for(k in 1:Repeats){
  rand.order <- order(runif(n))
  data = boston.df[rand.order,]
  sample <- 1:m
  for (i in 1:nfold) {
    train <- data[-sample,]
    test<- data[sample,]
    fit <- randomForest(formula, data=train, mtry=5, ntree=60)
    my.predict = predict( fit, newdata=test)
    pred.error[i] <- mean((test$MEDV - my.predict)^2)
    sample <- if(i==nfold) (max(sample)+1):n else sample + m
    }
 CV[k] <- mean(pred.error)
}
mean(CV)