# Commands for Multiple Regression Diagnostics.. # You can use the same for Simple Regression as well.. #Suppose the best set of predictor variables or independent variables are in matrix X # You have this from the subset selection procedure.. # The dependent variable is in vector Y bestmodel = lsfit(X,Y) #Linear regress fit ##### quantities for ANOVA SST = sum((Y - mean(Y))^2) Yhat = Y - bestmodel$res #(Y - Yhat = residuals) SSR = sum((Yhat - mean(Y))^2) SSE = sum((bestmodel$res)^2) MSR = SSR / ncol(X) MSE = SSE/(N - length(bestmodel$coef)) N = length(Y) XX = cbind(rep(1,N), X) ##########################3 Hat matrix and influence.. # Compute the Hat matrix hatm = XX %*% solve(t(XX) %*% XX) %*% t(XX) # You can also get the hat matrix from... library(MPV) zz = lm(Y ~ X) hatm = hatvalues(zz) # provides the diagonal elements of the hat matrix #studentized residuals - ri - equation 12-42 #ri = bestmodel$res/sqrt((1 - diag(hatm)) * MSE) ri = bestmodel$res/sqrt((1 - (hatm)) * MSE) #Compute Cook's Distance Equation 12-44 #Di = ri*ri * diag(hatm) / ((1-diag(hatm)) * length(bestmodel$coef)) Di = ri*ri * (hatm) / ((1-(hatm)) * length(bestmodel$coef)) #Plot the Di plot(1:N, Di, xlab="Obs", ylab="Cook's Distance") #If Dis are greater than 1 then there are points that have undue influence on the fit. ############## #### Error/Residual Diagnostics #get the residuals of the model.. x11=residuals(bestmodel) # residuals = Y - Yestimate ==> Yestimate = Y - residuals yest=Y-x11 plot(Y, yest) abline(a=0, b=1) # model diagnostics.. # plot the histogram of the residuals and add the pdf. This is to # see how good the residuals fit a normal distribution.. # you can do a KS test on this, or a qqplot..) par(mfrow=c(2,3)) hist(x11,probability=T) library(sm) sm.density(x11,add=T) qqnorm(x11) qqline(x11) # Create a qq line #plot the autocorrelation function - to see if the residuals are related # to each other.. IID assumption z1=acf(x11) # plot the Y estimates agains the residuals.. # for constant variance or homoskedasticity.. plot(yest,x11,xlab="estiimate of Y", ylab="residuals") # if there is a pattern then transformation is requred or iterated leas # squares.. #plot the residuals against each X variable.. # plot nd = ncol(X) for(i in 1:nd){ plot(X,x11,xlab="X",ylab="residuals") }