## part (a) library(mclust) library(tree) ### Fit tree ### Say you have the data frame named precip_dt #### with variables, Y, X1, X2, X3 tree.precip = tree(Y ~ X1+X2+X3, data = precip_dt) summary(tree.precip) ## unpruned tree plot(tree.precip) ### treeFun below treeFit = treeFun(precip_dt, toPlot=T, title="Burnt Area Tree") #Use tree to predict original data treePred <- predict(treeFit) treeResid <- resid(treeFit) myRange <- range(treePred, precipData$precip) #Plot observed vs predicted plot(precipData$precip, treePred, xlim = myRange, ylim = myRange, xlab="True value", ylab="Prediction", main = paste("Pred vs True for", title)) lines(precipData$precip, precipData$precip, col = "red") part(b) #### ############################# #### Fit a Random Forest #### ############################# # Fit random forest and plot forest.precip = randomForest(Y ~ X1+X2+X3, data = precip_dt) summary(forest.precip) plot(forest.precip) rforestpred = predict(forest.precip) ### compare the predicted values with the observed.. ### similarly apply bagging and other trees. ########## ### Function to prune the tree treeFun <- function(precip_dt, toPlot = F, title = ""){ #Create Complex Tree - or full tree myTree <- tree(Y ~ X1+X2+X3, data = precipData, model = T) #Perform CV on tree object cvTree <- cv.tree(myTree) optTree <- which.min(cvTree$dev) bestTree <- cvTree$size[optTree] #prune Tree based on CV results pruneTree <- prune.tree(myTree, best = bestTree) #If plotting is selected if(toPlot){ #Plot unpruned Tree plot(myTree) text(myTree, cex = .75) title(main = paste("Unpruned Tree for", title)) #Plot CV plot(cvTree$size, cvTree$dev, type = "b", main = paste("Cross Validation for", title)) #Plot Prunned Tree plot(pruneTree) text(pruneTree, cex = .75) title(main = paste("Pruned Tree for", title)) } return(pruneTree) }