1. Bayesian Hierarchical Spatial Model - Problem 9 from HW1
In the Bayesian models, plot the posterior histogram/PDF of the parameters; spatial maps of posterior mean and standard error. See Verdin et al. 2015 for tips

#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
graphics.off()

#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 02"
setwd(mainDir)
suppressPackageStartupMessages(source("hw2_library.R"))
source("hw2_library.R")

Read data

# Load winter precipitation data
data = read.table(paste(mainDir, "/data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
# Omit extreme values
non_values <- which(data[,-1] > 1000, arr.ind = T)
data[non_values] = NaN
data = na.omit(data)
prec_obs = colMeans(data[,-1])  # Mean of location columns
# Load location data
loc = read.table(paste(mainDir, "/data/Precipitaton_data.txt", sep = ""), header = T)

## Create design matrix
X = loc[,1:3]
names(X) = c("Long","Lat","Elev")
lon = X[,1]
lat = X[,2]
elev = X[,3]
precip = prec_obs   # Convert precipitation to cm

xs = cbind(lon, lat)

Fit GLM

glmfit = glm(prec_obs ~ ., data = X, family = Gamma(link = "log"))
print(summary(glmfit))
## 
## Call:
## glm(formula = prec_obs ~ ., family = Gamma(link = "log"), data = X)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.79244  -0.24519  -0.05654   0.15930   0.83631  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.346e+00  1.343e+00  -2.492   0.0151 *  
## Long        -5.956e-02  1.218e-02  -4.889 6.36e-06 ***
## Lat         -2.404e-03  1.559e-02  -0.154   0.8779    
## Elev         9.517e-06  8.691e-05   0.110   0.9131    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.122277)
## 
##     Null deviance: 11.0656  on 72  degrees of freedom
## Residual deviance:  7.7545  on 69  degrees of freedom
## AIC: 483.61
## 
## Number of Fisher Scoring iterations: 6

Fit variogram and perform MCMC

geod = as.geodata(cbind(glmfit$residuals, lon, lat), data.col = 1)
vg = variog(geod,breaks = seq(50,10000,50))
## variog: computing omnidirectional variogram
sigma.sq = median(vg$v)
sigma.sq.lo = 0.25*sigma.sq
sigma.sq.hi = 1.75*sigma.sq
phi.val = median(vg$u)
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val
# Input number of samples
n.samples = 5000
# Coordinates of observations
coords = cbind(lon, lat)
# Prior Distributions Defined
priors = list("beta.flat", "phi.unif" = c(phi.lo,phi.hi),"sigma.sq.ig"=c(sigma.sq.lo,sigma.sq.hi),"tau.sq.IG"=c(1,0.01))
# Starting values for MCMC resampling
starting = list("phi" = phi.val, "sigma.sq" = sigma.sq, "tau.sq" = 1)
# Adjustment factor for MCMC sampling routine
tuning = list("phi" = 1, "sigma.sq" = 1, "tau.sq" = 1)

# Perform Monte Carlo Markov Chain Analysis
bat.fm = spLM(prec_obs ~ ., data = X, coords = as.matrix(X[,1:2]), priors = priors, tuning = tuning, starting = starting, cov.model = "exponential", n.samples = n.samples, verbose = F, n.report = 50)
# Add some small amount to duplicated coordinates
dup = duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3
# Burn-In Samples
bat.fm = spRecover(bat.fm, start = ((1/3)*n.samples)+1, thin = 1, verbose = T)
## -------------------------------------------------
##      Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 3333, 2.97%
## Sampled: 199 of 3333, 5.97%
## Sampled: 299 of 3333, 8.97%
## Sampled: 399 of 3333, 11.97%
## Sampled: 499 of 3333, 14.97%
## Sampled: 599 of 3333, 17.97%
## Sampled: 699 of 3333, 20.97%
## Sampled: 799 of 3333, 23.97%
## Sampled: 899 of 3333, 26.97%
## Sampled: 999 of 3333, 29.97%
## Sampled: 1099 of 3333, 32.97%
## Sampled: 1199 of 3333, 35.97%
## Sampled: 1299 of 3333, 38.97%
## Sampled: 1399 of 3333, 41.97%
## Sampled: 1499 of 3333, 44.97%
## Sampled: 1599 of 3333, 47.97%
## Sampled: 1699 of 3333, 50.98%
## Sampled: 1799 of 3333, 53.98%
## Sampled: 1899 of 3333, 56.98%
## Sampled: 1999 of 3333, 59.98%
## Sampled: 2099 of 3333, 62.98%
## Sampled: 2199 of 3333, 65.98%
## Sampled: 2299 of 3333, 68.98%
## Sampled: 2399 of 3333, 71.98%
## Sampled: 2499 of 3333, 74.98%
## Sampled: 2599 of 3333, 77.98%
## Sampled: 2699 of 3333, 80.98%
## Sampled: 2799 of 3333, 83.98%
## Sampled: 2899 of 3333, 86.98%
## Sampled: 2999 of 3333, 89.98%
## Sampled: 3099 of 3333, 92.98%
## Sampled: 3199 of 3333, 95.98%
## Sampled: 3299 of 3333, 98.98%

Plot Posterior PDFs

beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples
plot(beta)

See attached PDF for plot of beta

plot(theta)

print(summary(window(bat.fm$p.beta.recover.samples)))
## 
## Iterations = 1:3333
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 3333 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                   Mean       SD  Naive SE Time-series SE
## (Intercept) -1.174e+02 31.64771 5.482e-01      5.146e-01
## Long        -1.293e+00  0.28601 4.954e-03      4.496e-03
## Lat         -6.019e-02  0.35728 6.189e-03      6.383e-03
## Elev         4.471e-04  0.00202 3.498e-05      3.498e-05
## 
## 2. Quantiles for each variable:
## 
##                   2.5%        25%        50%        75%      97.5%
## (Intercept) -179.78916 -1.384e+02 -1.173e+02 -95.905302 -56.544084
## Long          -1.85249 -1.485e+00 -1.292e+00  -1.103976  -0.743526
## Lat           -0.76261 -3.011e-01 -6.689e-02   0.187306   0.632244
## Elev          -0.00354 -8.970e-04  4.184e-04   0.001794   0.004514

Plot precipitation posterior PDF

elev_grid=read.delim(paste(mainDir, "/data/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
elev_hr = elev_grid[,3]
xps = cbind(lon,lat) 

y = matrix(nrow = length(elev_grid[,1]), ncol = nrow(beta))
yse = matrix(nrow = length(elev_grid[,1]), ncol = nrow(beta))

for (i in 1:nrow(beta)) {
  zz = Krig(xs, glmfit$residuals, sigma = theta[i,1], theta = theta[i,3], m = 1, tau2 = theta[i,2])
  y2 = predict(zz, x = xps, drop.Z = T)
  yse[,i] = predictSE(zz, x = xps, drop.Z = T)
  y1 = beta[i,1] + beta[i,2]*lon + beta[i,3]*lat + beta[i,4]*elev_hr
  y[,i] = y1 + y2
}
mean.y = apply(y, 1, FUN = mean)    # Take the mean and convert from % to fraction
mean.yse = apply(yse, 1, FUN = mean)  # Take the mean and convert from % to fraction

g1 = ggplot() + geom_histogram(aes(as.vector(y)), fill = "gray50", col = "black") +
  labs(title = "Histogram of Precipitation Posterior", x = "Predictions") +
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
show(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Spatially map precipitation estimate and standard error on high-resolution grid

pred = cbind(mean.y,mean.yse)
ypred = data.frame(pred)
names(ypred) = c("fit","se.fit")
Quilt_plotting(xps[,1],xps[,2],ypred,X[,1],X[,2],prec_obs)

Comments