{ #Commands to to subset selection using brute force.. #Suppose you have your data in X and Y #Y being the vector dependent variables and X being the matrix containing the #independent variables. #Y=scan("UBN_precip.txt") #X=matrix(scan("UBN_predictors.txt"),ncol=7,byrow=T) # Linear regression of a single independent variable X.. n=length(Y) # of data points.. # to select the best subset or 'best' model, fit models with all possible # combinations, compute a quantity PRESS or Cp and, select the model # with the minimum PRESS nd = length(X[1,]) # numnber of variables library(MPV) # if you have nd = 3 variables then you have # 3 - single variable models # 3 - two variable models # 1 - three varibale model # PRESS xpress = 1:7 # MSE xmse = 1:7 # Single variable models imod=0 for(i in 1:3){ XX = X[,i] YY = Y zz = lm(YY ~ XX) xhat = hatvalues(zz) # This gives the diagonal elements of the Hat matrix # PRESS xpp = sum((residuals(zz)/(1 - xhat))^2) # or you can compute the PRESS using # PRESS command.. # PRESS(zz) compare this with xpp above.. imod = imod+1 xpress[imod]=PRESS(zz) xmse[imod] = residuals(zz)*residuals(zz) / n } #Models with 2 variables.. # Create all 2 variable combinations.. XX1 = X[,1:2] XX2 = cbind(X[,1],X[,3]) XX3 = cbind(X[,2],X[,3]) #fit the models zz = lm(YY ~ XX1) imod = imod+1 xpress[imod]=PRESS(zz) xmse[imod] = residuals(zz)*residuals(zz) / n zz = lm(YY ~ XX2) imod = imod+1 xpress[imod]=PRESS(zz) xmse[imod] = residuals(zz)*residuals(zz) / n zz = lm(YY ~ XX3) imod = imod+1 xpress[imod]=PRESS(zz) xmse[imod] = residuals(zz)*residuals(zz) / n # Models with 3 variables. zz=lm(Y ~ X) imod=imod+1 xpress[imod]=PRESS(zz) xmse[imod] = residuals(zz)*residuals(zz) / n # order the PRESS values zz = order(xpress) zz[1] # is the model with the least PRESS - i.e. the best model. # order the MSE values to get the best model in terms of PRESS zz = order(xmse) zz[1] # is the model with the least MSE.