# Commands for Prediction from Multiple Regression # Prediction and Confidence Intervals.. # You can use the same for Simple Regression as well.. #Suppose the best set of predictor variables or independent variables are in matrix X # You have this from the subset selection procedure.. # The dependent variable is in vector Y model = lsfit(X,Y) #Linear regress fit ls.print(model) # spits out the F-test, R2, variable significance # F-test provides significance of the model #confidence intervals can be computed in two ways # - using lm and predict.lm and, the other is using lsfit and implementing # the equations.. # Using lm and predeict.lm bestmodelp = lm(Y ~ ., data=data.frame(X)) # If you wish the confidence intervals at all the points of X ypred = predict.lm(bestmodelp, interval=c("confidence")) #ypred$fit gives the Yhat and ypred$lwr and ypred$upr gives the lower and #upper confidence intervals # If you wish to to get the prediction and intervals for new observations # say that they are in a matrix Xp ypred = predict.lm(bestmodelp, interval=c("prediction"), newdata=data.frame(Xp)) ################################################################################## # You get the same thing by implementing the equations and using lsfit.. N = length(Y) XX = cbind(rep(1,N), X) bestmodelp = lsfit(X,Y) #Suppose Xp is the matrix containing the points at which predictions are # required # If it is a single variable then it will be vector Yhat = XX %*% bestmodelp$coef #gives you the prediction.. dof = N - length(bestmodelp$coef) errorvar = sum((bestmodelp$res)^2) / (N - length(bestmodelp$coef)) #Equation 12-16 # Confidence interval and Prediction interval (Equation 12-38 and 12-40) # 95% confidence level Lowconf = 1:N Uppconf = 1:N Lowpred = 1:np Upppred = 1:np for(i in 1:N){ xo = XX[i,] # Lower confidence interval Lowconf[i] = Yhat[i] - abs(qt(0.025,dof))*sqrt(errorvar * xo %*% solve(t(XX) %*% XX) %*% xo) #Lower prediction interval Lowpred[i] = Yhat[i] - abs(qt(0.025,dof))*sqrt(errorvar * (1 + xo %*% solve(t(XX) %*% XX) %*% xo)) # Upper Confidence interval Uppconf[i] = Yhat[i] + abs(qt(0.025,dof))*sqrt(errorvar * xo %*% solve(t(XX) %*% XX) %*% xo) #Upper Prediction interval Upppred[i] = Yhat[i] + abs(qt(0.025,dof))*sqrt(errorvar * (1 + xo %*% solve(t(XX) %*% XX) %*% xo)) } ### ypred$fit and Yhat should match, ypred$lwr and Lowconf should match, #Ypred$upr and Uppconf should match # if the predict.lm command has the arguement interval=c("prediction") and # ypred$lwr and Lowpred and ypred$upr and Upppred should match # If you wish to to get the prediction and intervals for new observations # say that they are in a matrix Xp, then create # XX = cbind(rep(1,nrow(Xp)), Xp) # repeat the above commands.. ####################################