3. Bayesian Hierarchical Spatial Model - Binomial
Develop a Bayesian Hierachical Spatial Model for problem 2. For this the first level is a Binomial distribution.
You can use the steps/code from Andrew Verdin at http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/bayes/spatial-bayes/\ And/or write your own code in RJAGS

#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
graphics.off()
.pardefault <- par()
par(ask = F)

#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 02"
setwd(mainDir)
suppressPackageStartupMessages(source("hw2_library.R"))
source("hw2_library.R")

Read data

# Load winter precipitation data
data = read.table(paste(mainDir, "/data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
# Omit extreme values
non_values <- which(data[,-1] > 1000, arr.ind = T)
data[non_values] = NaN
data = na.omit(data)
prec_obs = colMeans(data[,-1])  # Mean of location columns
# Load location data
loc = read.table(paste(mainDir, "/data/Precipitaton_data.txt", sep = ""), header = T)

## Create design matrix
X = loc[,1:3]
names(X) = c("Long","Lat","Elev")

## Compute the 75th percentile and categorize precipitation to a binary variable
q = quantile(prec_obs, c(0.75))
prec_bin = rep(1, length(prec_obs))

for (i in 1:length(prec_obs)) {
  if (prec_obs[i] < q) {
    prec_bin[i] = 0
  }
}

Fit GLM

glmfit = GLM_fit(prec_bin, X, family = "binomial")
## [1] "Results of AIC for bestfit GLM"
##       glm_val
## logit 60.9066
print(summary(glmfit))
## 
## Call:
## glm(formula = prec_obs ~ ., family = binomial(link = links[which.min(glm_val)]), 
##     data = xx)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8316  -0.4823  -0.2814   0.8056   2.2909  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -54.4745    13.2884  -4.099 4.14e-05 ***
## Long         -0.4906     0.1208  -4.061 4.90e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 83.708  on 72  degrees of freedom
## Residual deviance: 56.907  on 71  degrees of freedom
## AIC: 60.907
## 
## Number of Fisher Scoring iterations: 5

Fit variogram and perform MCMC

geod = as.geodata(cbind(glmfit$residuals, X$Long, X$Lat), data.col = 1)
vg = variog(geod,breaks = seq(50,10000,50))
## variog: computing omnidirectional variogram
sigma.sq = median(vg$v)
sigma.sq.lo = 0.25*sigma.sq
sigma.sq.hi = 1.75*sigma.sq
phi.val = median(vg$u)
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val
# Input number of samples
n.samples = 1000
# Coordinates of observations
coords = cbind(X$Long, X$Lat)
# Prior Distributions Defined
priors = list("beta.flat", "phi.unif" = c(phi.lo,phi.hi),"sigma.sq.ig"=c(sigma.sq.lo,sigma.sq.hi),"tau.sq.IG"=c(0.01,0.001))
# Starting values for MCMC resampling
starting = list("phi" = phi.val, "sigma.sq" = sigma.sq, "tau.sq" = 1)
# Adjustment factor for MCMC sampling routine
tuning = list("phi" = 1, "sigma.sq" = 1, "tau.sq" = 1)

# Perform Monte Carlo Markov Chain Analysis
bat.fm = spLM(prec_bin ~ ., data = X, coords = as.matrix(X[,1:2]), priors = priors, tuning = tuning, starting = starting, cov.model = "exponential", n.samples = n.samples, verbose = F, n.report = 50)
# Add some small amount to duplicated coordinates
dup = duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3
# Burn-In Samples
bat.fm = spRecover(bat.fm, start = ((1/3)*n.samples)+1, thin = 1)
## -------------------------------------------------
##      Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 666, 14.86%
## Sampled: 199 of 666, 29.88%
## Sampled: 299 of 666, 44.89%
## Sampled: 399 of 666, 59.91%
## Sampled: 499 of 666, 74.92%
## Sampled: 599 of 666, 89.94%

Plot posterior PDFs

beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples
plot(beta)

See attached PDF for plot of beta

plot(theta)

print(summary(window(bat.fm$p.beta.recover.samples)))
## 
## Iterations = 1:666
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 666 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                   Mean        SD  Naive SE Time-series SE
## (Intercept) -7.491e+00 2.8292409 1.096e-01      1.096e-01
## Long        -7.010e-02 0.0254592 9.865e-04      9.865e-04
## Lat          6.709e-03 0.0332811 1.290e-03      1.290e-03
## Elev        -2.501e-05 0.0001761 6.824e-06      6.824e-06
## 
## 2. Quantiles for each variable:
## 
##                   2.5%        25%        50%        75%      97.5%
## (Intercept) -1.316e+01 -9.3676628 -7.615e+00 -5.614e+00 -1.8202449
## Long        -1.187e-01 -0.0871249 -7.105e-02 -5.287e-02 -0.0183743
## Lat         -5.801e-02 -0.0155504  6.424e-03  2.722e-02  0.0748346
## Elev        -3.729e-04 -0.0001377 -2.997e-05  7.975e-05  0.0003364

Plot precipitation posterior PDF

elev_grid=read.delim(paste(mainDir, "/data/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
elev_hr = elev_grid[,3]
xps = cbind(lon,lat) 

y = matrix(nrow = length(elev_grid[,1]), ncol = nrow(beta))
yse = matrix(nrow = length(elev_grid[,1]), ncol = nrow(beta))

for (i in 1:nrow(beta)) {
  zz = Krig(X[,1:2], glmfit$residuals, sigma = theta[i,1], theta = theta[i,3], m = 1, tau2 = theta[i,2])
  y2 = predict(zz, x = xps, drop.Z = T)
  yse[,i] = predictSE(zz, x = xps, drop.Z = T)
  y1 = beta[i,1] + beta[i,2]*lon + beta[i,3]*lat + beta[i,4]*elev_hr
  y[,i] = y1 + y2
}
y=logit2prob(y) #to obtain the real value
yse=logit2prob(yse) #to obtain the real value
mean.y = apply(y, 1, FUN = mean)    # Take the mean and convert from % to fraction
mean.yse = apply(yse, 1, FUN = mean)  # Take the mean and convert from % to fraction

g1 = ggplot() + geom_histogram(aes(as.vector(y)), fill = "gray50", col = "black") +
  labs(title = "Histogram of Precipitation Posterior", x = "Predictions") +
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
show(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Spatially map precipitation estimate and standard error on high-resolution grid

pred = cbind(mean.y,mean.yse)
ypred = data.frame(pred)
names(ypred) = c("fit","se.fit")
par(.pardefault)
Quilt_plotting(xps[,1],xps[,2],ypred,X[,1],X[,2],prec_bin, type = "binary")

Comments