{ #Suppose you have your data in X and Y #Y being the vector dependent variables and X being the vector (or matrix) #of independent variables. #Y=scan("UBN_precip.txt") #X=matrix(scan("UBN_predictors.txt"),ncol=7,byrow=T) # Linear regression of a single independent variable X.. test=matrix(scan("Avg-SpringFlows-April1SWE-49-04.txt", skip = 1), ncol=2, byrow=T) Y=test[,1] #Spring Flows X=test[,2] #April 1 SWE plot(X,Y) cor(X,Y) n=length(Y) # of data points.. X1=cbind(rep(1,n),X) #add the first column of 1s betas=solve(t(X1) %*% X1)%*%t(X1)%*%Y #betas = (X^TX)^-1 X^T Y #command solve gives the inverse of a matrix #t(X) gives the transpose of a matrix #%*% is the command for matrix multiplication. #you can check the betas obtained as above from lsfit.. zz=lsfit(X,Y) abline(zz) ls.print(zz) #zz$coef should be exactly equal to betas # Add 100(1-alpha) confidence intervals from Chapter 11 slide 41. Or p. 363 in Walpole S_xx = sum((X-mean(X))^2) # see Chapter 11, ppt slide 19 or p. 362 in Walpole SS_E = sum((zz$resid)^2) # sum of the square of the residuals - see Chapter 11, ppt slide 25 errvar_est = SS_E/(n-2) # This is the so-called "unbiased estimate" of the variance of the errors, as calculated from SS_E, see Chap 11, ppt slide 26 hi_conf = zz$coef[2] + qt(0.025, (n-2))*sqrt(errvar_est/S_xx) lo_conf = zz$coef[2] - qt(0.025, (n-2))*sqrt(errvar_est/S_xx) # Alternative method to get confidence intervals that will work in higher order models (matrices) #to get the variances of the betas errvar=var(zz$resid) #error variances.. - above we estimated this in errvar_est - here we calculate it directly varbetas = solve(t(X) %*% X)*errvar #this gives a (k+1 X k+1) matrix of #variance-covariance.. #diagonal elements are the variances of the betas. #to get the confidence intervals for beta k=1 dofs = n-(k+1)-1 #degrees of freedom.. #k is the number of variables in the X matrix #95% confidence interval betas[2] + qt(0.025, dofs)*sqrt(varbetas) betas[2] - qt(0.025, dofs)*sqrt(varbetas) #You can also use this to get the t-statistic for the betas #and test their significances #confidence interval of the regression line #for a given point i of the original data.. the 95% confidence intervals are sum(X1[i,] * betas) + qt(0.025,dofs)*sqrt(t(X1[i,])%*%solve(t(X1)%*%X1)%*%X1[i,]*errvar) sum(X1[i,] * betas) - qt(0.025,dofs)*sqrt(t(X1[i,])%*%solve(t(X1)%*%X1)%*%X1[i,]*errvar) #For a new point xo it is a prediction interval.. sum(1[i,] * betas) + qt(0.025,dofs)*sqrt((1+t(X1[i,])%*%solve(t(X1)%*%X1)%*%X1[i,])*errvar) sum(1[i,] * betas) - qt(0.025,dofs)*sqrt((1+t(X1[i,])%*%solve(t(X1)%*%X1)%*%X1[i,])*errvar) # to get confidence and prediction intervals.. you can use lm # lm and lsfit provide the same answers.. # but lm is easy to work with in terms of prediction etc.. zz=lm(Y ~ X) predict.lm(zz,interval="confidence") predict.lm(zz, interval="prediction") #this will give you the confidence interval at the original data points that went into #fitting of the model. # SAy your point of interest is in the vector xo Xtrue=X #save the original data.. #replace X with xo lo_xo = min(Xtrue) - sd(Xtrue) hi_xo = max(Xtrue) + sd(Xtrue) X = seq(lo_xo, hi_xo, length=100) zz_conf = predict.lm(zz,data.frame(X),interval="confidence") zz_pred = predict.lm(zz,data.frame(X),interval="prediction") plot(X, zz_pred[,1], type = "l") # Linear regression fit points(Xtrue, Y, col = "red") # Observed Data lines(X, zz_pred[,2], col = "blue") # Prediction interval lines(X, zz_pred[,3], col = "blue") lines(X, zz_conf[,2], col = "green")# confidence interval lines(X, zz_conf[,3], col = "green") #predict.lm requires the data to be in the same name as the variable that went into # the lm(..) command - here it is X }