#### Anna Starodubtseva
#### CVEN 6833, HW 01
#### Problem 3 - GLM
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
# Other
save = FALSE
#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 01/R"
setwd(mainDir)
suppressPackageStartupMessages(source("hw1_library.R"))
source("hw1_library.R")
## Read data
# Load winter precipitation data
data = read.table(paste(mainDir, "/data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
# Omit extreme values
non_values <- which(data[,-1] > 1000, arr.ind = T)
data[non_values] = NaN
data = na.omit(data)
prec_obs = colMeans(data[,-1]) # Mean of location columns# Load location data
loc = read.table(paste(mainDir, "/data/Precipitaton_data.txt", sep = ""), header = T)
## Create design matrix
X = loc[,1:3]
names(X) = c("Long","Lat","Elev")
lon = X[,1]
lat = X[,2]
elev = X[,3]
# Add interaction terms
X$LongLat = X[,1]*X[,2]
X$LongElev = X[,1]*X[,3]
X$LatElev = X[,2]*X[,3]
## Scatterplot to show variable relationships
precip = prec_obs;
if(save==TRUE){
pdf("Simple Scatterplot Matrix.pdf") # save figure
dev.new()
pairs(~lon+lat+elev+precip,data=X,
main="Simple Scatterplot Matrix")
dev.off()
}else{
pairs(~lon+lat+elev+precip,data=X,
main="Simple Scatterplot Matrix")
}

#########################################################
####### (i) Fit a 'best' generalized linear model #######
#########################################################
mod_func = vector(length = 2)
bestmod_gamma =GLM_fit(prec_obs,X,"gamma")
## [1] "Results of AIC for bestfit GLM"
## glm_val
## log 479.4600
## inverse 478.7714
## identity 480.4798
mod_func[1] = AIC(bestmod_gamma)
bestmod_gauss = GLM_fit(prec_obs,X,"gaussian")
## [1] "Results of AIC for bestfit GLM"
## glm_val
## identity 509.5835
mod_func[2] = AIC(bestmod_gauss)
if (which.min(mod_func) == 1) {
bestmod = bestmod_gamma
} else {
bestmod = bestmod_gauss
}
print(summary(bestmod))
##
## Call:
## glm(formula = prec_obs ~ ., family = Gamma(link = links[which.min(glm_val)]),
## data = xx)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8304 -0.2376 -0.0651 0.1635 0.8467
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.689e-02 5.822e-03 8.054 1.45e-11 ***
## Elev 2.215e-04 3.927e-05 5.639 3.35e-07 ***
## LongElev 2.036e-06 3.637e-07 5.598 3.94e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1156084)
##
## Null deviance: 11.0656 on 72 degrees of freedom
## Residual deviance: 7.4635 on 70 degrees of freedom
## AIC: 478.77
##
## Number of Fisher Scoring iterations: 5
#########################################################
###### (ii) Show the scatterplot of observed and modeled precipitation along with the 1:1 line
#########################################################
prec_hat=bestmod$fitted.values
if (save==TRUE){
pdf("Precipitation_Scatterplot.pdf", width = 8, height = 6) # save figure
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(prec_obs,prec_hat)
dev.new()
plot(prec_obs,prec_hat,xlab="Actual Precipitation (mm)",ylab="Estimated Precipitation (mm)",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)
dev.off()
}else {
# Observed versus estimates
par(mfrow=c(1,1))
lim=range(prec_obs,prec_hat)
plot(prec_obs,prec_hat,xlab="Actual Precipitation (mm)",ylab="Estimated Precipitation (mm)",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)
}

#########################################################
####### (iii) Perform ANOVA and model diagnostics #######
#########################################################
print(anova(bestmod))
## Analysis of Deviance Table
##
## Model: Gamma, link: inverse
##
## Response: prec_obs
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 72 11.0656
## Elev 1 0.1361 71 10.9295
## LongElev 1 3.4660 70 7.4635
mod_diagnostics(prec_obs,prec_hat,2,save)

#########################################################
####### (iv) Compute drop-one cross-validated estimates from the best model and scatterplot them against the observed values
#########################################################
loocv_func(bestmod,X,prec_obs,save)

#########################################################
####### (v) Drop 10% of observations, fit the model to the rest of the data and predict the dropped points. Compute RMSE and correlation and show them as boxplots #######
#########################################################
Drop_10_pred(bestmod,X,prec_obs,save)

#########################################################
####### (vi) Spatially map the model estimates and standard error from the best model on the high-resolution grid
#########################################################
elev_grid=read.delim(paste(mainDir, "/data/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
# Add interaction terms
elev_grid$LongLat = elev_grid[,1]*elev_grid[,2]
elev_grid$LongElev = elev_grid[,1]*elev_grid[,3]
elev_grid$LatElev = elev_grid[,2]*elev_grid[,3]
# Predict precipitation based on best model
prec_pred = predict.glm(bestmod, elev_grid, se.fit = T, type = "response")
# Plot surface
plot_surface(elev_grid[,1],elev_grid[,2],prec_pred,X[,1],X[,2],prec_obs,save)


- GLM using a Gamma distribution captures more variation in areas of greater precipitation
- Residuals increase as magnitude of precipitation increases
- Greater magnitude of standard error compared to simple linear regression