#### Problem 3 - Homework 1 for CVEN6833 - Fitting a General linear model for July
#### Author: Alvaro Ossandon
#### CLEAR MEMORY
rm(list=ls())
#### Clear the R console
cat("\f")
#### Prevent Warnings
options(warn=-1)
#data required:
Month=7 # 1: Monthly precipitation of January; 2: Monthly precipitation of July
save=FALSE # True for saving figure as pdf file;
#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW1"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1.R"))
source("Lib_HW1.R")
#### create a folder to save the figures
if (Month==1){
diraux=paste(mainDir,"/Montly_Precep_January",sep = "")
}else{
diraux=paste(mainDir,"/Montly_Precep_July",sep = "")
}
setwd(diraux)
subDir="GLM_FIT"
if (file.exists(subDir)){
setwd(file.path(diraux, subDir))
} else {
dir.create(file.path(diraux, subDir))
setwd(file.path(diraux, subDir))
}
## Read data as table (data frame)
test = read.table(paste("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_monthly_precip_0",Month,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("Lat","Long","Elev","Pm")
test1=test
#filtering the -999 values
non_values<- which(test[,4]<0)
test[non_values,4]=NaN
test=na.omit(test)
Pm = test[,4] #Dependent Variable
X = test[,1:3]# Independent variable.
lon = X[,1]
lat = X[,2]
elev = X[,3]
precip =Pm
# Basic Scatterplot Matrix
if (save==TRUE){
pdf("Simple Scatterplot Matrix.pdf") # save figure
pairs(~lon+lat+elev+precip,data=test,
main="Simple Scatterplot Matrix")
dev.off()
} else{
pairs(~lon+lat+elev+precip,data=test,
main="Simple Scatterplot Matrix")
}

################################################
############## ii. Fit a GLM ###############
################################################
##Gamma and Gaussian distribution are tested
bestmod_gamma=GLM_fit(Pm,X,"gamma",Month)
## [1] "Results of PRESS for bestfit GLM"
## glm_press
## log 39.08097
## inverse 41.09635
## identity 36.36975
## [1] 7
bestmod_gau=GLM_fit(Pm,X,"gaussian",Month)
## [1] "Results of PRESS for bestfit GLM"
## glm_press
## identity 103698.9
## [1] 7
# Test using PRESS objective function
if (bestmod_gamma$Call$PRESS<bestmod_gau$Call$PRESS){
bestmod=bestmod_gamma
bestmod$Call$Press=PRESS(bestmod)
} else{
bestmod=bestmod_gau
}
summary(bestmod)
##
## Call:
## glm(formula = Pm ~ ., family = Gamma(link = links[which.min(glm_press)]),
## data = xx)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.07807 -0.18525 -0.03475 0.14304 1.01844
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.021e+03 4.233e+01 24.124 <2e-16 ***
## Lat -4.907e+00 5.637e-01 -8.705 <2e-16 ***
## Long 7.586e+00 3.371e-01 22.505 <2e-16 ***
## Elev 1.260e-02 1.049e-03 12.013 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.07470296)
##
## Null deviance: 65.005 on 445 degrees of freedom
## Residual deviance: 35.612 on 442 degrees of freedom
## AIC: 3665.4
##
## Number of Fisher Scoring iterations: 8
bestmod$Call$Press
## [1] 36.36975
Pmhat=bestmod$fitted.values
if (save==TRUE){
pdf("Precipitation_Scatterplot.pdf", width = 8, height = 6) # save figure
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(Pm,Pmhat)
plot(Pm,Pmhat,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)
dev.off()
}else{
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(Pm,Pmhat)
plot(Pm,Pmhat,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)
}

###################################################################
######## III. Anova and model diagnostics of your best model
###################################################################
### ANOVA:
anova(bestmod)
## Analysis of Deviance Table
##
## Model: Gamma, link: links[which.min(glm_press)]
##
## Response: Pm
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 445 65.005
## Lat 1 2.6476 444 62.358
## Long 1 16.3407 443 46.017
## Elev 1 10.4050 442 35.612
### MODEL DIAGNOSTICS:
#yy = precip_dt[,4] # true value of y based on data
Pmest = Pmhat # model's predicted values of Pm
nvar = 2 # number of variables
mod_diagnostics(Pm, Pmest, nvar,save)

###################################################################
#### IV. Compute cross-validated and fitted estmiates at each ####
#### observation. Plot them against the observed values. ####
###################################################################
loocv_func(bestmod,X,Pm,save)

###################################################################
#### V. Compute cross-validated and fitted estmiates at each ####
#### observation. Plot them against the observed values. ####
###################################################################
Drop_10_pred(bestmod,X,Pm,save)

###################################################################
## VI. Spatially map the model estimates and the standard error ####
###################################################################
# read the topography map
x1 = read.table("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
names(x1)=c("Lat","Long","Elev")
ypred =predict.glm(bestmod, newdata=x1[,1:3], se.fit=T,type="response")
#Quilt_plotting(x1[,2],x1[,1],ypred)
plot_surface(x1[,2],x1[,1],ypred,X[,2],X[,1],Pm,save)
