{
#Suppose you have your data in X and Y
#Y being the vector dependent variables and X being the vector (or matrix)
#of independent variables.

#Y=scan("UBN_precip.txt")
#X=matrix(scan("UBN_predictors.txt"),ncol=7,byrow=T)

# Linear regression of a single independent variable X..


test=matrix(scan("Avg-SpringFlows-April1SWE-49-04.txt", skip = 1), ncol=2, byrow=T)

Y=test[,1]		#Spring Flows
X=test[,2]		#April 1 SWE

plot(X,Y)
cor(X,Y)

n=length(Y)	# of data points..
X1=cbind(rep(1,n),X)	#add the first column of 1s

betas=solve(t(X1) %*% X1)%*%t(X1)%*%Y	#betas = (X^TX)^-1 X^T Y

#command solve gives the inverse of a matrix
#t(X) gives the transpose of a matrix
#%*% is the command for matrix multiplication.

#you can check the betas obtained as above from lsfit..

zz=lsfit(X,Y)
abline(zz)
ls.print(zz)
#zz$coef  should be exactly equal to betas

# Add 100(1-alpha) confidence intervals from Chapter 11 slide 41. Or p. 363 in Walpole
S_xx = sum((X-mean(X))^2) # see Chapter 11, ppt slide 19 or p. 362 in Walpole
SS_E = sum((zz$resid)^2) # sum of the square of the residuals - see Chapter 11, ppt slide 25
errvar_est = SS_E/(n-2) # This is the so-called "unbiased estimate" of the variance of the errors, as calculated from SS_E, see Chap 11, ppt slide 26
hi_conf = zz$coef[2] + qt(0.025, (n-2))*sqrt(errvar_est/S_xx)
lo_conf = zz$coef[2] - qt(0.025, (n-2))*sqrt(errvar_est/S_xx)

# Alternative method to get confidence intervals that will work in higher order models (matrices)
#to get the variances of the betas
errvar=var(zz$resid)	#error variances..  - above we estimated this in errvar_est - here we calculate it directly
varbetas = solve(t(X) %*% X)*errvar #this gives a (k+1 X k+1) matrix of
				#variance-covariance..
#diagonal elements are the variances of the betas.

#to get the confidence intervals for beta
k=1
dofs = n-(k+1)-1 #degrees of freedom..
#k is the number of variables in the X matrix
#95% confidence interval
betas[2] + qt(0.025, dofs)*sqrt(varbetas)
betas[2] - qt(0.025, dofs)*sqrt(varbetas)

#You can also use this to get the t-statistic for the betas
#and test their significances

#confidence interval of the regression line

#for a given point i of the original data.. the 95% confidence intervals are

sum(X1[i,] * betas) + qt(0.025,dofs)*sqrt(t(X1[i,])%*%solve(t(X1)%*%X1)%*%X1[i,]*errvar)
sum(X1[i,] * betas) - qt(0.025,dofs)*sqrt(t(X1[i,])%*%solve(t(X1)%*%X1)%*%X1[i,]*errvar)


#For a new point xo it is a prediction interval..
sum(1[i,] * betas) + qt(0.025,dofs)*sqrt((1+t(X1[i,])%*%solve(t(X1)%*%X1)%*%X1[i,])*errvar)
sum(1[i,] * betas) - qt(0.025,dofs)*sqrt((1+t(X1[i,])%*%solve(t(X1)%*%X1)%*%X1[i,])*errvar)

# to get confidence and prediction intervals.. you can use lm
# lm and lsfit provide the same answers..
# but lm is easy to work with in terms of prediction etc..

zz=lm(Y ~ X)

predict.lm(zz,interval="confidence")
predict.lm(zz, interval="prediction")
#this will give you the confidence interval at the original data points that went into
#fitting of the model.

# SAy your point of interest is in the vector xo
Xtrue=X #save the original data..

#replace X with xo
lo_xo = min(Xtrue) - sd(Xtrue)
hi_xo = max(Xtrue) + sd(Xtrue)
X = seq(lo_xo, hi_xo, length=100)

zz_conf = predict.lm(zz,data.frame(X),interval="confidence")
zz_pred = predict.lm(zz,data.frame(X),interval="prediction")
plot(X, zz_pred[,1], type = "l") # Linear regression fit
points(Xtrue, Y, col = "red")    # Observed Data
lines(X, zz_pred[,2], col = "blue") # Prediction interval
lines(X, zz_pred[,3], col = "blue")
lines(X, zz_conf[,2], col = "green")# confidence interval
lines(X, zz_conf[,3], col = "green")
#predict.lm requires the data to be in the same name as the variable that went into
# the lm(..) command - here it is X

}