1. Bayesian Hierarchical Spatial Model - Problem 9 from HW1
In the Bayesian models, plot the posterior histogram/PDF of the parameters; spatial maps of posterior mean and standard error. See Verdin et al. 2015 for tips
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
graphics.off()
#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 02"
setwd(mainDir)
suppressPackageStartupMessages(source("hw2_library.R"))
source("hw2_library.R")
# Load winter precipitation data
data = read.table(paste(mainDir, "/data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
# Omit extreme values
non_values <- which(data[,-1] > 1000, arr.ind = T)
data[non_values] = NaN
data = na.omit(data)
prec_obs = colMeans(data[,-1]) # Mean of location columns
# Load location data
loc = read.table(paste(mainDir, "/data/Precipitaton_data.txt", sep = ""), header = T)
## Create design matrix
X = loc[,1:3]
names(X) = c("Long","Lat","Elev")
lon = X[,1]
lat = X[,2]
elev = X[,3]
precip = prec_obs # Convert precipitation to cm
xs = cbind(lon, lat)
glmfit = glm(prec_obs ~ ., data = X, family = Gamma(link = "log"))
print(summary(glmfit))
##
## Call:
## glm(formula = prec_obs ~ ., family = Gamma(link = "log"), data = X)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.79244 -0.24519 -0.05654 0.15930 0.83631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.346e+00 1.343e+00 -2.492 0.0151 *
## Long -5.956e-02 1.218e-02 -4.889 6.36e-06 ***
## Lat -2.404e-03 1.559e-02 -0.154 0.8779
## Elev 9.517e-06 8.691e-05 0.110 0.9131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.122277)
##
## Null deviance: 11.0656 on 72 degrees of freedom
## Residual deviance: 7.7545 on 69 degrees of freedom
## AIC: 483.61
##
## Number of Fisher Scoring iterations: 6
geod = as.geodata(cbind(glmfit$residuals, lon, lat), data.col = 1)
vg = variog(geod,breaks = seq(50,10000,50))
## variog: computing omnidirectional variogram
sigma.sq = median(vg$v)
sigma.sq.lo = 0.25*sigma.sq
sigma.sq.hi = 1.75*sigma.sq
phi.val = median(vg$u)
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val
# Input number of samples
n.samples = 5000
# Coordinates of observations
coords = cbind(lon, lat)
# Prior Distributions Defined
priors = list("beta.flat", "phi.unif" = c(phi.lo,phi.hi),"sigma.sq.ig"=c(sigma.sq.lo,sigma.sq.hi),"tau.sq.IG"=c(1,0.01))
# Starting values for MCMC resampling
starting = list("phi" = phi.val, "sigma.sq" = sigma.sq, "tau.sq" = 1)
# Adjustment factor for MCMC sampling routine
tuning = list("phi" = 1, "sigma.sq" = 1, "tau.sq" = 1)
# Perform Monte Carlo Markov Chain Analysis
bat.fm = spLM(prec_obs ~ ., data = X, coords = as.matrix(X[,1:2]), priors = priors, tuning = tuning, starting = starting, cov.model = "exponential", n.samples = n.samples, verbose = F, n.report = 50)
# Add some small amount to duplicated coordinates
dup = duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3
# Burn-In Samples
bat.fm = spRecover(bat.fm, start = ((1/3)*n.samples)+1, thin = 1, verbose = T)
## -------------------------------------------------
## Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 3333, 2.97%
## Sampled: 199 of 3333, 5.97%
## Sampled: 299 of 3333, 8.97%
## Sampled: 399 of 3333, 11.97%
## Sampled: 499 of 3333, 14.97%
## Sampled: 599 of 3333, 17.97%
## Sampled: 699 of 3333, 20.97%
## Sampled: 799 of 3333, 23.97%
## Sampled: 899 of 3333, 26.97%
## Sampled: 999 of 3333, 29.97%
## Sampled: 1099 of 3333, 32.97%
## Sampled: 1199 of 3333, 35.97%
## Sampled: 1299 of 3333, 38.97%
## Sampled: 1399 of 3333, 41.97%
## Sampled: 1499 of 3333, 44.97%
## Sampled: 1599 of 3333, 47.97%
## Sampled: 1699 of 3333, 50.98%
## Sampled: 1799 of 3333, 53.98%
## Sampled: 1899 of 3333, 56.98%
## Sampled: 1999 of 3333, 59.98%
## Sampled: 2099 of 3333, 62.98%
## Sampled: 2199 of 3333, 65.98%
## Sampled: 2299 of 3333, 68.98%
## Sampled: 2399 of 3333, 71.98%
## Sampled: 2499 of 3333, 74.98%
## Sampled: 2599 of 3333, 77.98%
## Sampled: 2699 of 3333, 80.98%
## Sampled: 2799 of 3333, 83.98%
## Sampled: 2899 of 3333, 86.98%
## Sampled: 2999 of 3333, 89.98%
## Sampled: 3099 of 3333, 92.98%
## Sampled: 3199 of 3333, 95.98%
## Sampled: 3299 of 3333, 98.98%
beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples
plot(beta)
See attached PDF for plot of beta
plot(theta)
print(summary(window(bat.fm$p.beta.recover.samples)))
##
## Iterations = 1:3333
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 3333
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) -1.174e+02 31.64771 5.482e-01 5.146e-01
## Long -1.293e+00 0.28601 4.954e-03 4.496e-03
## Lat -6.019e-02 0.35728 6.189e-03 6.383e-03
## Elev 4.471e-04 0.00202 3.498e-05 3.498e-05
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) -179.78916 -1.384e+02 -1.173e+02 -95.905302 -56.544084
## Long -1.85249 -1.485e+00 -1.292e+00 -1.103976 -0.743526
## Lat -0.76261 -3.011e-01 -6.689e-02 0.187306 0.632244
## Elev -0.00354 -8.970e-04 4.184e-04 0.001794 0.004514
elev_grid=read.delim(paste(mainDir, "/data/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
elev_hr = elev_grid[,3]
xps = cbind(lon,lat)
y = matrix(nrow = length(elev_grid[,1]), ncol = nrow(beta))
yse = matrix(nrow = length(elev_grid[,1]), ncol = nrow(beta))
for (i in 1:nrow(beta)) {
zz = Krig(xs, glmfit$residuals, sigma = theta[i,1], theta = theta[i,3], m = 1, tau2 = theta[i,2])
y2 = predict(zz, x = xps, drop.Z = T)
yse[,i] = predictSE(zz, x = xps, drop.Z = T)
y1 = beta[i,1] + beta[i,2]*lon + beta[i,3]*lat + beta[i,4]*elev_hr
y[,i] = y1 + y2
}
mean.y = apply(y, 1, FUN = mean) # Take the mean and convert from % to fraction
mean.yse = apply(yse, 1, FUN = mean) # Take the mean and convert from % to fraction
g1 = ggplot() + geom_histogram(aes(as.vector(y)), fill = "gray50", col = "black") +
labs(title = "Histogram of Precipitation Posterior", x = "Predictions") +
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
show(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
pred = cbind(mean.y,mean.yse)
ypred = data.frame(pred)
names(ypred) = c("fit","se.fit")
Quilt_plotting(xps[,1],xps[,2],ypred,X[,1],X[,2],prec_obs)
Comments