#### Problem 3 - Homework 1 for CVEN6833 - Fitting a General linear model for July
#### Author: Alvaro Ossandon

#### CLEAR MEMORY
rm(list=ls())

#### Clear the R console
cat("\f")

#### Prevent Warnings
options(warn=-1)

#data required: 
Month=7 # 1: Monthly precipitation of January; 2: Monthly precipitation of July
save=FALSE # True for saving figure as pdf file;

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW1"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1.R"))
source("Lib_HW1.R")

#### create a folder to save the figures
if (Month==1){
  diraux=paste(mainDir,"/Montly_Precep_January",sep = "")  
}else{
  diraux=paste(mainDir,"/Montly_Precep_July",sep = "")
}
setwd(diraux)
subDir="GLM_FIT"
if (file.exists(subDir)){
  setwd(file.path(diraux, subDir))
} else {
  dir.create(file.path(diraux, subDir))
  setwd(file.path(diraux, subDir))
}

## Read data as table (data frame)
test = read.table(paste("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_monthly_precip_0",Month,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("Lat","Long","Elev","Pm")
test1=test
#filtering the -999 values
non_values<- which(test[,4]<0)
test[non_values,4]=NaN
test=na.omit(test)
Pm = test[,4]     #Dependent Variable
X = test[,1:3]# Independent variable.
lon = X[,1]
lat = X[,2]
elev = X[,3]
precip =Pm

# Basic Scatterplot Matrix
if (save==TRUE){
  pdf("Simple Scatterplot Matrix.pdf") # save figure
  pairs(~lon+lat+elev+precip,data=test, 
        main="Simple Scatterplot Matrix")
  dev.off() 
} else{
  pairs(~lon+lat+elev+precip,data=test, 
       main="Simple Scatterplot Matrix")
}

################################################
##############   ii. Fit a GLM   ###############
################################################
##Gamma and Gaussian distribution are tested

bestmod_gamma=GLM_fit(Pm,X,"gamma",Month)
## [1] "Results of PRESS for bestfit GLM"
##          glm_press
## log       39.08097
## inverse   41.09635
## identity  36.36975
## [1] 7
bestmod_gau=GLM_fit(Pm,X,"gaussian",Month)
## [1] "Results of PRESS for bestfit GLM"
##          glm_press
## identity  103698.9
## [1] 7
# Test using PRESS objective function
if (bestmod_gamma$Call$PRESS<bestmod_gau$Call$PRESS){
  bestmod=bestmod_gamma
  bestmod$Call$Press=PRESS(bestmod)
} else{
  bestmod=bestmod_gau
}
summary(bestmod)
## 
## Call:
## glm(formula = Pm ~ ., family = Gamma(link = links[which.min(glm_press)]), 
##     data = xx)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.07807  -0.18525  -0.03475   0.14304   1.01844  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.021e+03  4.233e+01  24.124   <2e-16 ***
## Lat         -4.907e+00  5.637e-01  -8.705   <2e-16 ***
## Long         7.586e+00  3.371e-01  22.505   <2e-16 ***
## Elev         1.260e-02  1.049e-03  12.013   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.07470296)
## 
##     Null deviance: 65.005  on 445  degrees of freedom
## Residual deviance: 35.612  on 442  degrees of freedom
## AIC: 3665.4
## 
## Number of Fisher Scoring iterations: 8
bestmod$Call$Press
## [1] 36.36975
Pmhat=bestmod$fitted.values
if (save==TRUE){
  pdf("Precipitation_Scatterplot.pdf", width = 8, height = 6) # save figure
  # Observed versus estimates
  par(mfrow=c(1,1))
  lim=range(Pm,Pmhat)
  plot(Pm,Pmhat,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
  abline(a=0,b=1)
  dev.off() 
}else{
  # Observed versus estimates
  par(mfrow=c(1,1))
  lim=range(Pm,Pmhat)
  plot(Pm,Pmhat,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
  abline(a=0,b=1)
}

###################################################################
######## III. Anova and model diagnostics of your best model
###################################################################
### ANOVA:
anova(bestmod)
## Analysis of Deviance Table
## 
## Model: Gamma, link: links[which.min(glm_press)]
## 
## Response: Pm
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev
## NULL                   445     65.005
## Lat   1   2.6476       444     62.358
## Long  1  16.3407       443     46.017
## Elev  1  10.4050       442     35.612
### MODEL DIAGNOSTICS:
#yy = precip_dt[,4]         # true value of y based on data
Pmest = Pmhat  # model's predicted values of Pm 
nvar = 2                   # number of variables 
mod_diagnostics(Pm, Pmest, nvar,save)

###################################################################
#### IV. Compute cross-validated and fitted estmiates at each ####
####      observation. Plot them against the observed values.  ####
###################################################################

loocv_func(bestmod,X,Pm,save)

###################################################################
#### V. Compute cross-validated and fitted estmiates at each ####
####      observation. Plot them against the observed values.  ####
###################################################################

Drop_10_pred(bestmod,X,Pm,save)

###################################################################
## VI. Spatially map the model estimates and the standard error ####
###################################################################
#  read the topography map
x1 = read.table("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
names(x1)=c("Lat","Long","Elev")

ypred =predict.glm(bestmod, newdata=x1[,1:3], se.fit=T,type="response")

#Quilt_plotting(x1[,2],x1[,1],ypred)
plot_surface(x1[,2],x1[,1],ypred,X[,2],X[,1],Pm,save)