# file of commands for lecture 25 drug.df<-read.table(file.choose(), header=T) # file drug.txt in the 330 data drug.df<-data.frame(drug.df[,-4], IVHX=factor(drug.df$IVHX)) attach(drug.df) par(mfrow=c(1,2)) plot(AGE,BECK,type="n", cex=1.2, xlab="Age",ylab="BECH Score") text(AGE,BECK,1:39, col=ifelse(DFREE==1, "red","blue"), cex=0.9) text(45,45,"Red: Drug Free", col="red", adj=0, cex=0.9) text(45,50,"Blue: Relapse",col="blue",adj=0, cex=0.9) plot(AGE,NDRUGTX,type="n", cex=1.2, xlab="Age",ylab="Number of Prior Treatments") text(AGE,NDRUGTX,1:39, col=ifelse(DFREE==1, "red","blue"), cex=0.9) text(45,35,"Red: Drug Free", col="red", adj=0, cex=0.9) text(45,40,"Blue: Relapse",col="blue",adj=0, cex=0.9) est.prob<- table(DFREE,IVHX)[2,]/table(IVHX) plot(IVHX,est.prob) est.prob<- table(DFREE,RACE)[2,]/table(RACE) plot(unique(RACE),est.prob) est.prob<- table(DFREE,TREAT)[2,]/table(TREAT) plot(unique(TREAT),est.prob) est.prob<- table(DFREE,SITE)[2,]/table(SITE) plot(unique(SITE),est.prob) x<-IVHX marginal.plot<-function(x, fac.names){ est.prob<- table(DFREE,x)[2,]/table(x) plot(sort(unique(x)),est.prob, axes=F, xlab=deparse(substitute(x)), ylab="Estimated probability of being drug free", pch=19, col="red", cex=1.2, type="l", lwd=2) points(sort(unique(x)),est.prob, col="red", cex=1.2,pch=19) axis(side=1, at =sort(unique(x)), labels = fac.names, tick = TRUE) axis(2) box() } par(mfrow=c(1,3)) marginal.plot(IVHX,c("Never","Previous","Recent") ) marginal.plot(TREAT, c("Short","Long")) marginal.plot(SITE,c("A","B")) drug.glm<-glm(DFREE~ . , family=binomial, data=drug.df) summary(drug.glm) HLtest(drug.glm) anova(drug.glm) null.glm<-glm(DFREE~ 1 , family=binomial, data=drug.df) step(null.glm, formula(drug.glm),type="step") sub.glm<-glm(DFREE ~ NDRUGTX + IVHX + AGE + TREAT , family=binomial, data=drug.df) sub2.glm<-glm(DFREE ~ NDRUGTX*IVHX + AGE*IVHX + AGE*TREAT + NDRUGTX*TREAT , family=binomial, data=drug.df) anova(sub.glm, sub2.glm) par(mfrow=c(1,2)) sub.gam<-gam(DFREE ~ s(NDRUGTX) + IVHX + s(AGE) + TREAT , family=binomial, data=drug.df) plot(sub.gam) subq.glm<-glm(DFREE ~ poly(NDRUGTX,2) + IVHX + AGE + TREAT , family=binomial, data=drug.df) c1<-coef(glm(DFREE ~ NDRUGTX + IVHX + AGE + TREAT , family=binomial, data=drug.df)) c2<-coef(glm(DFREE ~ NDRUGTX + IVHX + AGE + TREAT , family=binomial, subset=(1:575)[-7],data=drug.df)) c3<-coef(glm(DFREE ~ NDRUGTX + IVHX + AGE + TREAT , family=binomial, subset=(1:575)[-85],data=drug.df)) c4<-coef(glm(DFREE ~ NDRUGTX + IVHX + AGE + TREAT , family=binomial, subset=(1:575)[-471],data=drug.df)) c5<-coef(glm(DFREE ~ NDRUGTX + IVHX + AGE + TREAT , family=binomial, subset=(1:575)[-c(7,85,471)],data=drug.df)) result<-cbind(c1,c2,c3,c4,c5) dimnames(result)<-list(names(c1), c("None", "7", "85", "471", "All")) round(result, 3) qsub.glm<-glm(DFREE ~ NDRUGTX + IVHX + AGE + TREAT , family=quasibinomial, data=drug.df) cross.val.glm(DFREE ~ NDRUGTX + IVHX + AGE + TREAT , drug.df) cross.val.glm<-function(formula, data, nfold=10, nrep=20){ # formula: the model formula # data: the data frame containing the data stuff.glm<-glm(formula, family = binomial, data = data) n<-length(residuals(stuff.glm)) m<-n%/%nfold Spec<-matrix(0,nrep,nfold) Sense<-matrix(0,nrep,nfold) Tot<-matrix(0,nrep,nfold) for(i in 1:nrep){ rand.order<-order(runif(n)) newdata<-data[rand.order,] sample<-1:m for(j in 1:nfold){ fit<-glm(formula, family =binomial, data = newdata[-sample,]) y.pred<-(predict(fit, newdata[sample,],type="response")>0.5)*1 y.act<-(stuff.glm$y[rand.order])[sample] Spec[i,j]<-sum(y.act*y.pred)/sum(y.act) Sense[i,j]<-sum((1-y.pred)*(1-y.act))/sum(1-y.act) Tot[i,j]<-(sum((1-y.pred)*(1-y.act))+sum(y.act*y.pred))/m sample<-sample + m } } cat("Mean Specificity = ",mean(Spec, na.rm=T), "\n") cat("Mean Sensitivity = ",mean(Sense, na.rm=T), "\n") cat("Mean Correctly classified = ",mean(Tot, na.rm=T), "\n") invisible(list(Specificity=Spec, Sensitivity=Sense, Correct=Tot)) } table(cut(predict(sub.glm, type="response"), (0:10)/10))