#### Problem 7 - Homework 1 for CVEN6833 - Fitting a GLM for binary precipitation of januuary 11th, 1997
#### Author: Alvaro Ossandon


#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)

#data required: 
day=11 # day of January 1997 
save=FALSE # True for saving figure as pdf file;

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW1"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1.R"))
source("Lib_HW1.R")

#### create a folder to save the figures
subDir=toString(day)
if (file.exists(subDir)){
  setwd(file.path(mainDir, subDir))
} else {
  dir.create(file.path(mainDir, subDir))
  setwd(file.path(mainDir, subDir))
}

## Read data as table (data frame)
test = read.table(paste("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_pcp_daily_1997_01_",day,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("Lat","Long","Elev","Pm")
test1=test
#filtering the -999 values
non_values<- which(test[,4]<0)
test[non_values,4]=NaN
test=na.omit(test)

# convert the precipitation to binary data 
Pm=matrix(0,ncol=1,nrow=length(test[,4]))
n_values<- which(test[,4]>0)
Pm[n_values] =1  #Dependent Variable
X = test[,1:3]# Independent variable.

################################################
##############   I. Fit a GLM   ###############
################################################

bestmod=GLM_fit(Pm,X,"binomial")
##    1    2    3 
## TRUE TRUE TRUE 
##    1    2    3 
## TRUE TRUE TRUE 
## [1] "Results of PRESS for bestfit GLM"
##        glm_press
## logit   344.7705
## probit  345.1326
## [1] 7
summary(bestmod)
## 
## Call:
## glm(formula = Pm ~ ., family = binomial(link = links[which.min(glm_press)]), 
##     data = xx)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1818  -1.1053   0.6241   0.8497   1.5016  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.6759733 10.0312143  -0.067  0.94627    
## Lat          0.5284851  0.1192679   4.431 9.38e-06 ***
## Long         0.1980892  0.0859432   2.305  0.02117 *  
## Elev         0.0007879  0.0002330   3.382  0.00072 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 366.82  on 289  degrees of freedom
## Residual deviance: 334.95  on 286  degrees of freedom
## AIC: 342.95
## 
## Number of Fisher Scoring iterations: 4
Pmhat=bestmod$fitted.values
if (save==TRUE){
pdf("Precipitation_Scatterplot.pdf", width = 8, height = 6) # save figure
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(Pm,Pmhat)
plot(Pm,Pmhat,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
dev.off() 
}else{
  # Observed versus estimates
  par(mfrow=c(1,1))
  lim=range(Pm,Pmhat)
  plot(Pm,Pmhat,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
}

### ANOVA:
anova(bestmod)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Pm
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev
## NULL                   289     366.82
## Lat   1  19.3720       288     347.45
## Long  1   0.3534       287     347.10
## Elev  1  12.1504       286     334.95
###################################################################
## II. Spatially map the model estimates and the standard error ####
###################################################################
#  read the topography map
x1 = read.table("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
names(x1)=c("Lat","Long","Elev")

ypred =predict.glm(bestmod, newdata=x1[,1:3], se.fit=T,type="response")

#Quilt_plotting(x1[,2],x1[,1],ypred)
plot_surface(x1[,2],x1[,1],ypred,test[,2],test[,1],Pm,save,type="binary")