#### Problem 4 - Homework 1 for CVEN6833 - Fitting a local polynomial model for July
#### Author: Alvaro Ossandon
#### CLEAR MEMORY
rm(list=ls())
#### Clear the R console
cat("\f")
#### Prevent Warnings
options(warn=-1)
#data required:
Month=7 # 1: Monthly precipitation of January; 2: Monthly precipitation of July
save=FALSE # True for saving figure as pdf file
#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW1"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1.R"))
source("Lib_HW1.R")
#### create a folder to save the figures
if (Month==1){
diraux=paste(mainDir,"/Montly_Precep_January",sep = "")
}else{
diraux=paste(mainDir,"/Montly_Precep_July",sep = "")
}
setwd(diraux)
subDir="LOC_FIT"
if (file.exists(subDir)){
setwd(file.path(diraux, subDir))
} else {
dir.create(file.path(diraux, subDir))
setwd(file.path(diraux, subDir))
}
## Read data as table (data frame)
test = read.table(paste("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_monthly_precip_0",Month,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("Lat","Long","Elev","Pm")
test1=test
#filtering the -999 values
non_values<- which(test[,4]<0)
test[non_values,4]=NaN
test=na.omit(test)
Pm = test[,4] #Dependent Variable
X = test[,1:3]# Independent variable.
lon = X[,1]
lat = X[,2]
elev = X[,3]
precip =Pm
# Basic Scatterplot Matrix
if(save==TRUE){
pdf("Simple Scatterplot Matrix.pdf") # save figure
pairs(~lon+lat+elev+precip,data=test,
main="Simple Scatterplot Matrix")
dev.off()
}else{
pairs(~lon+lat+elev+precip,data=test,
main="Simple Scatterplot Matrix")
}

################################################
###### II. Local polynomial method ###########
################################################
N = length(Pm)
combs = leaps(X,Pm, nbest=25) # GEt upto 25 combinations for each
# number of predictors
combos = combs$which
ncombos = length(combos[,1])
gcvs_m=1:ncombos
for(i in 1:ncombos) {
xx = X[,combos[i,]]
xx=as.data.frame(xx)
bestmod=locpoly_fit(Pm, xx, glm=FALSE,plot=FALSE)
gcvs_m[i] = bestmod$call$gcv
}
X=X[,combos[which.min(gcvs_m),]]
bestmod=locpoly_fit(Pm,X, glm=FALSE,plot=FALSE)
bestmod$combo=combos[which.min(gcvs_m),]
summary(bestmod)
## Estimation type: Local Regression
##
## Call:
## locfit(formula = Pm ~ ., data = X, alpha = 0.331390134529148,
## maxk = 10000, deg = 2, kern = "bisq", ev = dat(), gcv = 171.360854513287)
##
## Number of data points: 446
## Independent variables: Lat Long Elev
## Evaluation structure: Data
## Number of evaluation points: 446
## Degree of fit: 2
## Fitted Degrees of Freedom: 32.001
###################################################################
################# L matrix Calculation #############
###################################################################
x=as.matrix(X)
L1 = matrix(0,ncol=N,nrow=N)
for(i in 1:N){L1[i,]=locfit(Pm ~ x,alpha=bestmod$call$alpha,deg=bestmod$call$deg,ev=x[i,],
kern="bisq", geth=1)}
Pest1=L1%*%Pm
#compute the GCV for this alpha..
gcvalpha=(N*sum((Pm-Pest1)^2)) / ((N-sum(diag(L1)))^2)
print(c('alpha, gcv, gcv from gcvplot', bestmod$call$alpha, gcvalpha, bestmod$call$gcv))
## [1] "alpha, gcv, gcv from gcvplot" "0.331390134529148"
## [3] "185.572576544518" "171.360854513287"
bestmod=locpoly_fit(Pm,X, glm=FALSE,plot=TRUE)
bestmod$combo=combos[which.min(gcvs_m),]
Pmhat=predict(bestmod,X,se.fit=T)
# compare the predicted values by model and by L matrix
if(save==TRUE){
pdf("Estimated_Precipitation_Locfit_Vs_LMatrix.pdf", width = 8, height = 6) # save figure
par(mfrow=c(1,1))
lim=range(Pest1,Pmhat$fit)
plot(Pest1,Pmhat$fit,xlab="Estimated Precipitation",ylab="Estimated Precipitation with L matrix",main="Estimated Precipitation vs Estimated Precipitation with L", xlim=lim, ylim=lim)
abline(a=0,b=1)
dev.off()
}else{
par(mfrow=c(1,1))
lim=range(Pest1,Pmhat$fit)
plot(Pest1,Pmhat$fit,xlab="Estimated Precipitation",ylab="Estimated Precipitation with L matrix",main="Estimated Precipitation vs Estimated Precipitation with L", xlim=lim, ylim=lim)
abline(a=0,b=1)
}

#Estimate the value of the function at the observed locations..
if(save==TRUE){
pdf("Precipitation_Scatterplot.pdf", width = 8, height = 6) # save figure
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(Pm,Pmhat$fit)
plot(Pm,Pmhat$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)
dev.off()
}else{
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(Pm,Pmhat$fit)
plot(Pm,Pmhat$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)
}

###################################################################
######## III. F Test and model diagnostics of your best model
###################################################################
loc_Ftest(Pm,Pmhat,X,bestmod)
## [1] "F-test:"
## [1] "Reject the Null because F(local poly) = 6.33 > 1.48 = F(linear model)."
### MODEL DIAGNOSTICS:
Pmest = Pmhat$fit # model's predicted values of Pm
nvar = 2 # number of variables
mod_diagnostics(Pm, Pmest, nvar,save)

###################################################################
#### IV. Compute cross-validated and fitted estmiates at each ####
#### observation. Plot them against the observed values. ####
###################################################################
#do a x-validated prediction - i.e., drop a point and obtain its estimate using the rest.
zcv=locfit(Pm ~., data=X, alpha=bestmod$call$alpha, deg=bestmod$call$deg, kern="bisq", ev=dat(cv=TRUE), scale=TRUE)
#cross validated estimates..
Pcv = predict(zcv)
# Plot LOOCV against observations
if (save==TRUE){
pdf("LOOCV.pdf", width = 8, height = 6) # save figure
lim = range(Pm, Pcv)
plot(Pm, Pcv, xlim = lim, ylim=lim, xlab="Observed", ylab="X-validated Estimate", main= "LOOCV")
abline(a=0,b=1,col = "red")
dev.off()
}else{
lim = range(Pm, Pcv)
plot(Pm, Pcv, xlim = lim, ylim=lim, xlab="Observed", ylab="X-validated Estimate", main= "LOOCV")
abline(a=0,b=1,col = "red")
}

###############################################################################################
#### V. Drop 10% of obs, fit best model, predict dropped points. Compute RMSE and R2. Boxplot.
## validation nsample times (dropping a new 10% each time), and outputs boxplots of RMSE and R2.
###############################################################################################
Drop_10_pred(bestmod,X,Pm,save)

###################################################################
## VI. Spatially map the model estimates and the standard error ####
###################################################################
# read the topography map
x1 = read.table("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
names(x1)=c("Lat","Long","Elev")
ypred <- predict(bestmod,newdata=x1,se.fit=T)
#Quilt_plotting(x1[,2],x1[,1],ypred)
plot_surface(x1[,2],x1[,1],ypred,X[,2],X[,1],Pm,save)
