2. Logistic Regression
Compute the 75th percentile of the average winter 3-day maximum precipitation across all the stations and using this as the threshold, categrize the annual precipitation at each station to a binary variable 0 if annual precipitation is less than the threshold, 1 otherwise.
(i) Fit a ‘best’ GLM (i.e. logistic regression) with the appropriate link function using one of the objective functions. Test the model goodness using ANOVA
(ii) Estimate the function on the grid and plot the surface. Also plot the standard error.
(iii) Repeat (i) and (ii) with Local GLM

#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
graphics.off()

#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 02"
setwd(mainDir)
suppressPackageStartupMessages(source("hw2_library.R"))
source("hw2_library.R")

Read data

# Load winter precipitation data
data = read.table(paste(mainDir, "/data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
# Omit extreme values
non_values <- which(data[,-1] > 1000, arr.ind = T)
data[non_values] = NaN
data = na.omit(data)
prec_obs = colMeans(data[,-1])  # Mean of location columns
# Load location data
loc = read.table(paste(mainDir, "/data/Precipitaton_data.txt", sep = ""), header = T)

## Create design matrix
X = loc[,1:3]
names(X) = c("Long","Lat","Elev")
# Add interaction terms
# X$LongLat = X[,1]*X[,2]
# X$LongElev = X[,1]*X[,3]
# X$LatElev = X[,2]*X[,3]

## Compute the 75th percentile and categorize precipitation to a binary variable
q = quantile(prec_obs, c(0.75))
prec_bin = rep(1, length(prec_obs))

for (i in 1:length(prec_obs)) {
  if (prec_obs[i] < q) {
    prec_bin[i] = 0
  }
}

Fit GLM

glmfit = GLM_fit(prec_bin, X, family = "binomial")
## [1] "Results of AIC for bestfit GLM"
##       glm_val
## logit 60.9066
print(summary(glmfit))
## 
## Call:
## glm(formula = prec_obs ~ ., family = binomial(link = links[which.min(glm_val)]), 
##     data = xx)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8316  -0.4823  -0.2814   0.8056   2.2909  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -54.4745    13.2884  -4.099 4.14e-05 ***
## Long         -0.4906     0.1208  -4.061 4.90e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.708  on 72  degrees of freedom
## Residual deviance: 56.907  on 71  degrees of freedom
## AIC: 60.907
## 
## Number of Fisher Scoring iterations: 5

Plot observed vs. predicted precipitation

prec_pred = glmfit$fitted.values
par(mfrow=c(1,1))
lim=range(prec_bin,prec_pred)
plot(prec_bin,prec_pred,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)

Test model goodness using ANOVA

anova(glmfit)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: prec_obs
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev
## NULL                    72     83.708
## Long  1   26.802        71     56.907

(ii) Estimate the function on the grid and plot the surface. Also plot the standard error

elev_grid=read.delim(paste(mainDir, "/data/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
elev_hr = elev_grid[,3]
xps = cbind(lon,lat) 

ypred = predict(glmfit, newdata = elev_grid, se.fit = T, type = "response")
Quilt_plotting(xps[,1], xps[,2], ypred, X[,1], X[,2], prec_bin, type = "binary")

(iii) Repeat (i) and (ii) with Local GLM

Plot observed vs. predicted precipitation

prec_pred = predict(locfit, Xnew, se.fit = T)
par(mfrow=c(1,1))
lim=range(prec_bin,prec_pred$fit)
plot(prec_bin,prec_pred$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)

Test model goodness using ANOVA

loc_Ftest(prec_bin,prec_pred, Xnew, locfit)
## [1] "F-test:"
## [1] "Reject the Null because F(local poly) = 4.90 > 2.52 = F(linear model)."

Estimate the function on the grid and plot the surface. Also plot the standard error

ypred = predict(locfit, newdata = elev_grid, se.fit = T, type = "response")
Quilt_plotting(xps[,1], xps[,2], ypred, X[,1], X[,2], prec_bin, type = "binary")

Comments