Homework 2, Problem 2

#### CLEAR MEMORY
rm(list=ls())

#### Prevent Warnings
options(warn=-1)

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
setwd("C:/Users/Billy/Google Drive/CU Boulder Courses (CVEN)/CVEN 6833 Advanced Data Analysis/Homework/Howework 2")
suppressPackageStartupMessages(source("hw2_lib.R"))

#### Problem 6 - Homework 1 for CVEN6833
#### Author: Billy Raseman based on the work of Emily Gill and Adam McCurdy 

#### CLEAR MEMORY
rm(list=ls())

#### Prevent Warnings
options(warn=-1)

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
setwd("C:/Users/Billy/Google Drive/CU Boulder Courses (CVEN)/CVEN 6833 Advanced Data Analysis/Homework/Homework 1")
suppressPackageStartupMessages(source("hw1_lib.R"))
source("hw1_lib.R")

Read in data and fit GLM

#### READ IN DATA

## Read data as table from local directory 
precip = read.table("data_files/climatol_ann.txt")      # ann avg precip 246 x 4
grid = read.table("data_files/india_grid_topo.txt")     # high res DEM 3384 x 3
rajeevan = read.table("data_files/rajeevan_grid.txt")   # rajeevan grid 357 x 2

## Organize data into a data frame with names for columns/variables:
precip_dt = data.frame(precip)                          # ann avg precip at 246 locations
colnames(precip_dt) = c("lon", "lat", "elev", "pmm")
india_grid = data.frame(grid)                           # high resolution DEM (more than just India)
colnames(india_grid) = c("lon", "lat", "elev")
raj_grid = data.frame(rajeevan[,2], rajeevan[,1])       # lat/lon for just India
colnames(raj_grid) = c("lon","lat")

### Find the 75th percentile of the annual precip and convert precip into binary exceedance data

p75 = quantile(precip_dt[,4], 0.75)
exceed75 = precip_dt[,4] >= p75

# Find best GLM model 

pmm = exceed75
X = precip_dt[,1:3]
family = "binomial"

if (family == "gamma") {
  links = c("log", "inverse", "identity")   
  
  # clean data and remove zeros
  precip_dt = na.omit(precip_dt)
  rownames(precip_dt) = NULL
  precip_dt$pmm = ifelse(precip_dt$pmm <=0, runif(1, 0.0001, 0.001), precip_dt$pmm)
  
} else if (family == "binomial"){
  links = c("logit", "probit", "cauchit")
}

glm_aic = vector(length = length(links))
N = length(pmm)

combs = leaps(X,pmm, nbest=25)     #  GEt upto 25 combinations for each
# number of predictors
combos = combs$which

ncombos = length(combos[,1])

xpress=1:ncombos
xmse = 1:ncombos

for(j in 1:length(links)) {
  
  for(i in 1:ncombos) {
    
    xx = X[,combos[i,]]
    xx=as.data.frame(xx)
    if (family == "gamma") {
      zz=glm(pmm ~ ., data=xx, family = Gamma(link=links[j]), maxit=500)
    } else if (family == "binomial"){
      zz=glm(pmm ~ ., data=xx, family = binomial(link=links[j]), maxit=500)
    } else {print("Error!")}
    
    xpress[i]=PRESS(zz)
    
    xmse[i] = sum((zz$res)^2) / (N - length(zz$coef))
  }
  
  
  # Test using AIC objective function
  zms=stepAIC(zz, scope=list(upper = ~., lower = ~1), trace=FALSE)
  glm_aic[j] = zms$aic
}

aic_df = data.frame(glm_aic)
rownames(aic_df) = links[1:3]

print("Results of AIC for bestfit GLM")

## [1] "Results of AIC for bestfit GLM"

print(aic_df)

##          glm_aic
## logit   195.8791
## probit  198.0058
## cauchit 189.1644

sprintf("Choosing the GLM which minimizes AIC: %s family and %s link function.", family, links[which.min(glm_aic)])

## [1] "Choosing the GLM which minimizes AIC: binomial family and cauchit link function."

if (family == "gamma") {
  glmfit = glm(pmm ~ ., data = X, family = Gamma(link=links[which.min(glm_aic)]))
} else if (family == "binomial") {
  glmfit = glm(pmm ~ ., data = X, family = binomial(link=links[which.min(glm_aic)]))
} else { 
  print("Error!")
}

Fit variogram and perform MCMC

# obtain initial estimate of effective range (phi)
geod = as.geodata(cbind(glmfit$residuals,X$lon,X$lat),data.col=1)
vg = variog(geod,breaks=seq(50,10000,50))

## variog: computing omnidirectional variogram

phi.val = median(vg$u) 
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val

# Input number of samples
n.samples <- 500
# coordinates of observations
coords <- cbind(X$lon,X$lat) 
# prior distributions defined (limited distributions available... see help(spLM)
# NUGGET SET TO ZERO
priors <- list("beta.flat", "phi.unif"=c(phi.lo,phi.hi),
               "sigma.sq.ig"=c(100, 100))
# starting values for MCMC resampling
# NUGGET SET TO ZERO
starting <- list("phi"=phi.val, "sigma.sq"=500)#, "tau.sq"=100)
# adjustment factor for MCMC sampling routine
# NUGGET SET TO ZERO
tuning <- list("phi"=1, "sigma.sq"=1)#, "tau.sq"=100) # WJR why set these to 1??

# THIS WILL TAKE A WHILE (~ 10 MINUTES?)
bat.fm <- spLM(formula=pmm~elev, data=X, coords=coords, priors=priors, tuning=tuning, starting=starting, cov.model = "exponential", n.samples = n.samples, verbose = TRUE, n.report = 50)

## ----------------------------------------
##  General model description
## ----------------------------------------
## Model fit with 246 observations.
## 
## Number of covariates 2 (including intercept if specified).
## 
## Using the exponential spatial correlation model.
## 
## Number of MCMC samples 500.
## 
## tau.sq not included in the model (i.e., no nugget model).
## 
## Priors and hyperpriors:
##  beta flat.
##  sigma.sq IG hyperpriors shape=100.00000 and scale=100.00000
##  phi Unif hyperpriors a=18.75000 and b=131.25000
## -------------------------------------------------
##      Sampling
## -------------------------------------------------
## Sampled: 50 of 500, 10.00%
## Report interval Metrop. Acceptance rate: 28.00%
## Overall Metrop. Acceptance rate: 28.00%
## -------------------------------------------------
## Sampled: 100 of 500, 20.00%
## Report interval Metrop. Acceptance rate: 10.00%
## Overall Metrop. Acceptance rate: 19.00%
## -------------------------------------------------
## Sampled: 150 of 500, 30.00%
## Report interval Metrop. Acceptance rate: 10.00%
## Overall Metrop. Acceptance rate: 16.00%
## -------------------------------------------------
## Sampled: 200 of 500, 40.00%
## Report interval Metrop. Acceptance rate: 2.00%
## Overall Metrop. Acceptance rate: 12.50%
## -------------------------------------------------
## Sampled: 250 of 500, 50.00%
## Report interval Metrop. Acceptance rate: 4.00%
## Overall Metrop. Acceptance rate: 10.80%
## -------------------------------------------------
## Sampled: 300 of 500, 60.00%
## Report interval Metrop. Acceptance rate: 2.00%
## Overall Metrop. Acceptance rate: 9.33%
## -------------------------------------------------
## Sampled: 350 of 500, 70.00%
## Report interval Metrop. Acceptance rate: 16.00%
## Overall Metrop. Acceptance rate: 10.29%
## -------------------------------------------------
## Sampled: 400 of 500, 80.00%
## Report interval Metrop. Acceptance rate: 6.00%
## Overall Metrop. Acceptance rate: 9.75%
## -------------------------------------------------
## Sampled: 450 of 500, 90.00%
## Report interval Metrop. Acceptance rate: 10.00%
## Overall Metrop. Acceptance rate: 9.78%
## -------------------------------------------------
## Sampled: 500 of 500, 100.00%
## Report interval Metrop. Acceptance rate: 10.00%
## Overall Metrop. Acceptance rate: 9.80%
## -------------------------------------------------

# burn-in samples
bat.fm <- spRecover(bat.fm, start=((1/3)*n.samples)+1, thin=1)

## -------------------------------------------------
##      Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 333, 29.73%
## Sampled: 199 of 333, 59.76%
## Sampled: 299 of 333, 89.79%

Plot posterior pdfs, predicted precipitation, and standard error

# plot posterior pdfs
beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples
plot(beta)

plot(theta)

## predict precipitation on Rajeevan grid

# get rajeevan grid
xv1=data.frame(india_grid[,1:2])
xv2=data.frame(raj_grid[,1:2])

zzint=row.match(xv2,xv1)
indexnotna = which(!is.na(zzint))
indexna = which( is.na(zzint) )

## create the new rajeevan grid
rajeevgridel = cbind(raj_grid[indexnotna,1:2],india_grid[zzint[indexnotna],3])  

# 1) try doing it from 1st principles instead of using spPredict() (check homework 1. Do the sample but with in an ensemble...)
# 2) after you get the parameters, use the krig() command
n.samples2 = floor(n.samples*(2/3))

# zz = list(n.samples2)
# for (i in 1:n.samples2){
# zz[[i]] = Krig(precip_dt[,1:2],glmfit$residuals,rho=theta[i,1],theta=theta[i,2],m=1)
# }
y = matrix(nrow=length(rajeevgridel[,1]), ncol=n.samples2)
yse = matrix(nrow=length(rajeevgridel[,1]), ncol=n.samples2)

for (i in 1:n.samples2){
  print(i)
  zz = Krig(precip_dt[,1:2],glmfit$residuals,rho=theta[i,1],theta=theta[i,2],m=1)
  y2 = predict.Krig(zz,x=rajeevgridel[,1:2],drop.Z=TRUE) 
  yse[,i] = predictSE(zz, x=rajeevgridel[,1:2], drop.Z=TRUE) 
  y1 = beta[i,1] + beta[i,2]*rajeevgridel[,3]
  y[,i] = y1+y2
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
## [1] 51
## [1] 52
## [1] 53
## [1] 54
## [1] 55
## [1] 56
## [1] 57
## [1] 58
## [1] 59
## [1] 60
## [1] 61
## [1] 62
## [1] 63
## [1] 64
## [1] 65
## [1] 66
## [1] 67
## [1] 68
## [1] 69
## [1] 70
## [1] 71
## [1] 72
## [1] 73
## [1] 74
## [1] 75
## [1] 76
## [1] 77
## [1] 78
## [1] 79
## [1] 80
## [1] 81
## [1] 82
## [1] 83
## [1] 84
## [1] 85
## [1] 86
## [1] 87
## [1] 88
## [1] 89
## [1] 90
## [1] 91
## [1] 92
## [1] 93
## [1] 94
## [1] 95
## [1] 96
## [1] 97
## [1] 98
## [1] 99
## [1] 100
## [1] 101
## [1] 102
## [1] 103
## [1] 104
## [1] 105
## [1] 106
## [1] 107
## [1] 108
## [1] 109
## [1] 110
## [1] 111
## [1] 112
## [1] 113
## [1] 114
## [1] 115
## [1] 116
## [1] 117
## [1] 118
## [1] 119
## [1] 120
## [1] 121
## [1] 122
## [1] 123
## [1] 124
## [1] 125
## [1] 126
## [1] 127
## [1] 128
## [1] 129
## [1] 130
## [1] 131
## [1] 132
## [1] 133
## [1] 134
## [1] 135
## [1] 136
## [1] 137
## [1] 138
## [1] 139
## [1] 140
## [1] 141
## [1] 142
## [1] 143
## [1] 144
## [1] 145
## [1] 146
## [1] 147
## [1] 148
## [1] 149
## [1] 150
## [1] 151
## [1] 152
## [1] 153
## [1] 154
## [1] 155
## [1] 156
## [1] 157
## [1] 158
## [1] 159
## [1] 160
## [1] 161
## [1] 162
## [1] 163
## [1] 164
## [1] 165
## [1] 166
## [1] 167
## [1] 168
## [1] 169
## [1] 170
## [1] 171
## [1] 172
## [1] 173
## [1] 174
## [1] 175
## [1] 176
## [1] 177
## [1] 178
## [1] 179
## [1] 180
## [1] 181
## [1] 182
## [1] 183
## [1] 184
## [1] 185
## [1] 186
## [1] 187
## [1] 188
## [1] 189
## [1] 190
## [1] 191
## [1] 192
## [1] 193
## [1] 194
## [1] 195
## [1] 196
## [1] 197
## [1] 198
## [1] 199
## [1] 200
## [1] 201
## [1] 202
## [1] 203
## [1] 204
## [1] 205
## [1] 206
## [1] 207
## [1] 208
## [1] 209
## [1] 210
## [1] 211
## [1] 212
## [1] 213
## [1] 214
## [1] 215
## [1] 216
## [1] 217
## [1] 218
## [1] 219
## [1] 220
## [1] 221
## [1] 222
## [1] 223
## [1] 224
## [1] 225
## [1] 226
## [1] 227
## [1] 228
## [1] 229
## [1] 230
## [1] 231
## [1] 232
## [1] 233
## [1] 234
## [1] 235
## [1] 236
## [1] 237
## [1] 238
## [1] 239
## [1] 240
## [1] 241
## [1] 242
## [1] 243
## [1] 244
## [1] 245
## [1] 246
## [1] 247
## [1] 248
## [1] 249
## [1] 250
## [1] 251
## [1] 252
## [1] 253
## [1] 254
## [1] 255
## [1] 256
## [1] 257
## [1] 258
## [1] 259
## [1] 260
## [1] 261
## [1] 262
## [1] 263
## [1] 264
## [1] 265
## [1] 266
## [1] 267
## [1] 268
## [1] 269
## [1] 270
## [1] 271
## [1] 272
## [1] 273
## [1] 274
## [1] 275
## [1] 276
## [1] 277
## [1] 278
## [1] 279
## [1] 280
## [1] 281
## [1] 282
## [1] 283
## [1] 284
## [1] 285
## [1] 286
## [1] 287
## [1] 288
## [1] 289
## [1] 290
## [1] 291
## [1] 292
## [1] 293
## [1] 294
## [1] 295
## [1] 296
## [1] 297
## [1] 298
## [1] 299
## [1] 300
## [1] 301
## [1] 302
## [1] 303
## [1] 304
## [1] 305
## [1] 306
## [1] 307
## [1] 308
## [1] 309
## [1] 310
## [1] 311
## [1] 312
## [1] 313
## [1] 314
## [1] 315
## [1] 316
## [1] 317
## [1] 318
## [1] 319
## [1] 320
## [1] 321
## [1] 322
## [1] 323
## [1] 324
## [1] 325
## [1] 326
## [1] 327
## [1] 328
## [1] 329
## [1] 330
## [1] 331
## [1] 332
## [1] 333

mean.y = apply(y, 1, FUN = mean)/100 # take the mean and convert from percent to fraction
mean.yse = apply(yse, 1, FUN = mean)/100 # take the mean and convert from percent to fraction

par(mfrow=c(1,1))
hist(y, main = "Histogram of Precipitation Posterior")

# plot precipitation precition (mean value from bayesian model)
quilt.plot(rajeevgridel[,1],rajeevgridel[,2],mean.y,xlab="Longitude (m)",ylab="Latitude (m)",main='Predict. Probability for the 75th Precntile Precip. Exceedance')
world(add=T,lwd=1)

# plot precipitation precition (mean value of standard error from bayesian model)
quilt.plot(rajeevgridel[,1],rajeevgridel[,2],mean.yse,xlab="Longitude (m)",ylab="Latitude (m)",main='Predict. Standard Error for 75th Percentile Precip. Exceedance')
world(add=T,lwd=1)

Homework 2, Problem 2

Billy Raseman

November 21, 2016

Read in data and fit GLM

Fit variogram and perform MCMC

Plot posterior pdfs, predicted precipitation, and standard error