{

#Suppose your data is in X and Y
#X is the vector/matrix of indep. variables
#Y is the vector of dep. variables.

X = as.matrix(X)
nvar = dim(X)[2]
# do a linear regression..
zz=lsfit(X,Y)


#get the residuals of the model..
x11=residuals(zz)

# residuals = Y - Yestimate ==> Yestimate = Y - residuals
yest=Y-x11

# model diagnostics..
# plot the histogram of the residuals and add the pdf. This is to 
# see how good the residuals fit a normal distribution..
# you can do a KS test on this, or a qqplot..)
hist(x11,probability=T)          
library(sm)
lines(sort(x11),dnorm(sort(x11),mean=0,sd=sd(x11)))
sm.density(x11,add=T,col="red") 

## qqnorm and qqline
qqnorm(x11)
qqline(x11)               

#plot the residuals against the X value
for(i in 1:nvar){
plot(X[,i],x11,xlab="X",ylab="residuals") 
}

# plot the Y estimates agains the residuals..
plot(yest,x11,xlab="estiimate of Y", ylab="residuals")

#plot the autocorrelation function - to see if the residuals are related
# to each other..
z1=acf(x11)


#----------------------------------------

#get the cross validated estimates..

n=length(Y)
yest=1:n
nvar=dim(X)[2]

index=1:n
for(i in 1:n){
index1=index[index != i]
Xval=X[index1,]
Yval=Y[index1]

zz=lsfit(Xval,Yval)

#now estimate at the point that was dropped
xpred=c(1,X[i,1:nvar])
yest[i]=sum(zz$coef * xpred)

}
#now surface the x-validated estimates..
plot(Y,yest, xlim=range(Y,yest), ylim=range(Y,yest), xlab="True value", ylab="x-validated estimate")
lines(Y,Y)

### Drop some % of points, fit the model and predict the dropped points..
library(arules)
nsim = 500
rmseskill = 1:nsim
corskill=1:nsim	
N = length(Y)
N15 = round(0.15*N)		#drop 15% of points
index=1:N
for(i in 1:nsim){
drop=sample(c(1:N),N15)
keep=setdiff(index,drop)

x=X[keep,]
y=Y[keep]
zz=lm(y~x)
x = X[drop,]
yhat=predict(zz,newdata=data.frame(x))
rmseskill[i]=mean((Y[drop]-yhat)^2)
## or
rmseskill[i]=mean(((Y[drop]-yhat)/sd(Y[drop]))^2)

corskill[i]=cor(Y[drop],yhat)
}