#### Problem 8 - Homework 1 for CVEN6833 - Fitting a local GLM for binary precipitation of januuary 11th, 1997
#### Author: Alvaro Ossandon
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#data required:
day=11 # day of January 1997
save=FALSE # True for saving figure as pdf file;
#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW1"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1.R"))
source("Lib_HW1.R")
#### create a folder to save the figures
subDir=paste(toString(day),"/Log_GLM",sep = "")
if (file.exists(subDir)){
setwd(file.path(mainDir, subDir))
} else {
dir.create(file.path(mainDir, subDir))
setwd(file.path(mainDir, subDir))
}
## Read data as table (data frame)
test = read.table(paste("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_pcp_daily_1997_01_",day,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("Lat","Long","Elev","Pm")
test1=test
#filtering the -999 values
non_values<- which(test[,4]<0)
test[non_values,4]=NaN
test=na.omit(test)
# convert the precipitation to binary data
Pm=matrix(0,ncol=1,nrow=length(test[,4]))
n_values<- which(test[,4]>0)
Pm[n_values] =1 #Dependent Variable
X = test[,1:3]# Independent variable.
##############################################
########### I. Local GLM method ############
links = c("logit")
N = length(Pm)
combs = leaps(X,Pm, nbest=25) # GEt upto 25 combinations for each
# number of predictors
combos = combs$which
ncombos = length(combos[,1])
gcvs_m=1:ncombos
glm_gcv=vector(length = length(links))
best_comb=vector(length = length(links))
for(j in 1:length(links)) { sprintf("j",j)
for(i in 1:ncombos) {
xx = X[,combos[i,]]
xx=as.data.frame(xx)
zz=locpoly_fit(Pm, xx,family="binomial", link=links[j], glm=TRUE,plot=TRUE)
gcvs_m[i] = zz$call$gcv
}
# Test using GCV objective function
glm_gcv[j]=min(gcvs_m)
best_comb[j]=which.min(gcvs_m)
}
bestmod=locpoly_fit(Pm, X[,combos[best_comb[which.min(glm_gcv)],]],family="binomial", link=links[which.min(glm_gcv)], glm=TRUE,plot=TRUE)
bestmod$combo=combos[best_comb[which.min(glm_gcv)],]
X=X[,bestmod$combo]
summary(bestmod)
## Estimation type: Local Likelihood - Binomial
##
## Call:
## locfit(formula = Pm ~ ., data = X, alpha = 0.6, maxk = 10000,
## deg = 2, kern = "bisq", scale = T, family = "binomial", link = "logit",
## gcv = 0.982497903228272)
##
## Number of data points: 290
## Independent variables: Lat Long Elev
## Evaluation structure: Rectangular Tree
## Number of evaluation points: 123
## Degree of fit: 2
## Fitted Degrees of Freedom: 27.666
#Estimate the value of the function at the observed locations..
Pmhat=predict(bestmod,X,se.fit=T)
if (save==TRUE){
pdf("Precipitation_Scatterplot.pdf", width = 8, height = 6) # save figure
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(Pm,Pmhat$fit)
plot(Pm,Pmhat$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
dev.off()
}else{
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(Pm,Pmhat$fit)
plot(Pm,Pmhat$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
}

###################################################################
## II. Spatially map the model estimates and the standard error ####
###################################################################
# read the topography map
x1 = read.table("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
names(x1)=c("Lat","Long","Elev")
ypred <- predict(bestmod,newdata=x1,se.fit=T)
#Quilt_plotting(x1[,2],x1[,1],ypred)
plot_surface(x1[,2],x1[,1],ypred,test[,2],test[,1],Pm,save,type="binary")
