#### Problem 8 - Homework 1 for CVEN6833 - Fitting a local GLM for binary precipitation of januuary 11th, 1997
#### Author: Alvaro Ossandon


#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)

#data required: 
day=11 # day of January 1997 
save=FALSE # True for saving figure as pdf file;

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW1"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1.R"))
source("Lib_HW1.R")

#### create a folder to save the figures
subDir=paste(toString(day),"/Log_GLM",sep = "")  
if (file.exists(subDir)){
  setwd(file.path(mainDir, subDir))
} else {
  dir.create(file.path(mainDir, subDir))
  setwd(file.path(mainDir, subDir))
}

## Read data as table (data frame)
test = read.table(paste("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_pcp_daily_1997_01_",day,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("Lat","Long","Elev","Pm")
test1=test
#filtering the -999 values
non_values<- which(test[,4]<0)
test[non_values,4]=NaN
test=na.omit(test)
# convert the precipitation to binary data 
Pm=matrix(0,ncol=1,nrow=length(test[,4]))
n_values<- which(test[,4]>0)
Pm[n_values] =1  #Dependent Variable
X = test[,1:3]# Independent variable.

##############################################
########### I. Local GLM method   ############
links = c("logit")
N = length(Pm)
combs = leaps(X,Pm, nbest=25)     #  GEt upto 25 combinations for each
# number of predictors
combos = combs$which
ncombos = length(combos[,1])
gcvs_m=1:ncombos
glm_gcv=vector(length = length(links))
best_comb=vector(length = length(links))
for(j in 1:length(links)) { sprintf("j",j)
  
  for(i in 1:ncombos) {
    xx = X[,combos[i,]]
    xx=as.data.frame(xx)
    zz=locpoly_fit(Pm, xx,family="binomial", link=links[j], glm=TRUE,plot=TRUE)
    gcvs_m[i] = zz$call$gcv
  }
  # Test using GCV objective function
  glm_gcv[j]=min(gcvs_m)
  best_comb[j]=which.min(gcvs_m)
}
bestmod=locpoly_fit(Pm, X[,combos[best_comb[which.min(glm_gcv)],]],family="binomial", link=links[which.min(glm_gcv)], glm=TRUE,plot=TRUE)
bestmod$combo=combos[best_comb[which.min(glm_gcv)],]

X=X[,bestmod$combo]
summary(bestmod)
## Estimation type: Local Likelihood - Binomial 
## 
## Call:
## locfit(formula = Pm ~ ., data = X, alpha = 0.6, maxk = 10000, 
##     deg = 2, kern = "bisq", scale = T, family = "binomial", link = "logit", 
##     gcv = 0.982497903228272)
## 
## Number of data points:  290 
## Independent variables:  Lat Long Elev 
## Evaluation structure: Rectangular Tree 
## Number of evaluation points:  123 
## Degree of fit:  2 
## Fitted Degrees of Freedom:  27.666
#Estimate the value of the function at the observed locations..
Pmhat=predict(bestmod,X,se.fit=T)
if (save==TRUE){
  pdf("Precipitation_Scatterplot.pdf", width = 8, height = 6) # save figure
  # Observed versus estimates
  par(mfrow=c(1,1))
  lim=range(Pm,Pmhat$fit)
  plot(Pm,Pmhat$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
  dev.off() 
  
}else{
  # Observed versus estimates
  par(mfrow=c(1,1))
  lim=range(Pm,Pmhat$fit)
  plot(Pm,Pmhat$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
  
}

###################################################################
## II. Spatially map the model estimates and the standard error ####
###################################################################
#  read the topography map
x1 = read.table("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
names(x1)=c("Lat","Long","Elev")
ypred <- predict(bestmod,newdata=x1,se.fit=T)
#Quilt_plotting(x1[,2],x1[,1],ypred)
plot_surface(x1[,2],x1[,1],ypred,test[,2],test[,1],Pm,save,type="binary")