{ #Suppose your data is in X and Y #X is the vector/matrix of indep. variables #Y is the vector of dep. variables. X = as.matrix(X) nvar = dim(X)[2] # do a linear regression.. zz=lsfit(X,Y) #get the residuals of the model.. x11=residuals(zz) # residuals = Y - Yestimate ==> Yestimate = Y - residuals yest=Y-x11 # model diagnostics.. # plot the histogram of the residuals and add the pdf. This is to # see how good the residuals fit a normal distribution.. # you can do a KS test on this, or a qqplot..) hist(x11,probability=T) library(sm) lines(sort(x11),dnorm(sort(x11),mean=0,sd=sd(x11))) sm.density(x11,add=T,col="red") ## qqnorm and qqline qqnorm(x11) qqline(x11) #plot the residuals against the X value for(i in 1:nvar){ plot(X[,i],x11,xlab="X",ylab="residuals") } # plot the Y estimates agains the residuals.. plot(yest,x11,xlab="estiimate of Y", ylab="residuals") #plot the autocorrelation function - to see if the residuals are related # to each other.. z1=acf(x11) #---------------------------------------- #get the cross validated estimates.. n=length(Y) yest=1:n nvar=dim(X)[2] index=1:n for(i in 1:n){ index1=index[index != i] Xval=X[index1,] Yval=Y[index1] zz=lsfit(Xval,Yval) #now estimate at the point that was dropped xpred=c(1,X[i,1:nvar]) yest[i]=sum(zz$coef * xpred) } #now surface the x-validated estimates.. plot(Y,yest, xlim=range(Y,yest), ylim=range(Y,yest), xlab="True value", ylab="x-validated estimate") lines(Y,Y) ### Drop some % of points, fit the model and predict the dropped points.. library(arules) nsim = 500 rmseskill = 1:nsim corskill=1:nsim N = length(Y) N15 = round(0.15*N) #drop 15% of points index=1:N for(i in 1:nsim){ drop=sample(c(1:N),N15) keep=setdiff(index,drop) x=X[keep,] y=Y[keep] zz=lm(y~x) x = X[drop,] yhat=predict(zz,newdata=data.frame(x)) rmseskill[i]=mean((Y[drop]-yhat)^2) ## or rmseskill[i]=mean(((Y[drop]-yhat)/sd(Y[drop]))^2) corskill[i]=cor(Y[drop],yhat) }