3. Bayesian Hierarchical Spatial Model - Binomial
Develop a Bayesian Hierachical Spatial Model for problem 2. For this the first level is a Binomial distribution.
You can use the steps/code from Andrew Verdin at http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/bayes/spatial-bayes/\ And/or write your own code in RJAGS
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
graphics.off()
.pardefault <- par()
par(ask = F)
#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 02"
setwd(mainDir)
suppressPackageStartupMessages(source("hw2_library.R"))
source("hw2_library.R")
# Load winter precipitation data
data = read.table(paste(mainDir, "/data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
# Omit extreme values
non_values <- which(data[,-1] > 1000, arr.ind = T)
data[non_values] = NaN
data = na.omit(data)
prec_obs = colMeans(data[,-1]) # Mean of location columns
# Load location data
loc = read.table(paste(mainDir, "/data/Precipitaton_data.txt", sep = ""), header = T)
## Create design matrix
X = loc[,1:3]
names(X) = c("Long","Lat","Elev")
## Compute the 75th percentile and categorize precipitation to a binary variable
q = quantile(prec_obs, c(0.75))
prec_bin = rep(1, length(prec_obs))
for (i in 1:length(prec_obs)) {
if (prec_obs[i] < q) {
prec_bin[i] = 0
}
}
glmfit = GLM_fit(prec_bin, X, family = "binomial")
## [1] "Results of AIC for bestfit GLM"
## glm_val
## logit 60.9066
print(summary(glmfit))
##
## Call:
## glm(formula = prec_obs ~ ., family = binomial(link = links[which.min(glm_val)]),
## data = xx)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8316 -0.4823 -0.2814 0.8056 2.2909
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -54.4745 13.2884 -4.099 4.14e-05 ***
## Long -0.4906 0.1208 -4.061 4.90e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 83.708 on 72 degrees of freedom
## Residual deviance: 56.907 on 71 degrees of freedom
## AIC: 60.907
##
## Number of Fisher Scoring iterations: 5
geod = as.geodata(cbind(glmfit$residuals, X$Long, X$Lat), data.col = 1)
vg = variog(geod,breaks = seq(50,10000,50))
## variog: computing omnidirectional variogram
sigma.sq = median(vg$v)
sigma.sq.lo = 0.25*sigma.sq
sigma.sq.hi = 1.75*sigma.sq
phi.val = median(vg$u)
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val
# Input number of samples
n.samples = 1000
# Coordinates of observations
coords = cbind(X$Long, X$Lat)
# Prior Distributions Defined
priors = list("beta.flat", "phi.unif" = c(phi.lo,phi.hi),"sigma.sq.ig"=c(sigma.sq.lo,sigma.sq.hi),"tau.sq.IG"=c(0.01,0.001))
# Starting values for MCMC resampling
starting = list("phi" = phi.val, "sigma.sq" = sigma.sq, "tau.sq" = 1)
# Adjustment factor for MCMC sampling routine
tuning = list("phi" = 1, "sigma.sq" = 1, "tau.sq" = 1)
# Perform Monte Carlo Markov Chain Analysis
bat.fm = spLM(prec_bin ~ ., data = X, coords = as.matrix(X[,1:2]), priors = priors, tuning = tuning, starting = starting, cov.model = "exponential", n.samples = n.samples, verbose = F, n.report = 50)
# Add some small amount to duplicated coordinates
dup = duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3
# Burn-In Samples
bat.fm = spRecover(bat.fm, start = ((1/3)*n.samples)+1, thin = 1)
## -------------------------------------------------
## Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 666, 14.86%
## Sampled: 199 of 666, 29.88%
## Sampled: 299 of 666, 44.89%
## Sampled: 399 of 666, 59.91%
## Sampled: 499 of 666, 74.92%
## Sampled: 599 of 666, 89.94%
beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples
plot(beta)
See attached PDF for plot of beta
plot(theta)
print(summary(window(bat.fm$p.beta.recover.samples)))
##
## Iterations = 1:666
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 666
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) -7.491e+00 2.8292409 1.096e-01 1.096e-01
## Long -7.010e-02 0.0254592 9.865e-04 9.865e-04
## Lat 6.709e-03 0.0332811 1.290e-03 1.290e-03
## Elev -2.501e-05 0.0001761 6.824e-06 6.824e-06
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) -1.316e+01 -9.3676628 -7.615e+00 -5.614e+00 -1.8202449
## Long -1.187e-01 -0.0871249 -7.105e-02 -5.287e-02 -0.0183743
## Lat -5.801e-02 -0.0155504 6.424e-03 2.722e-02 0.0748346
## Elev -3.729e-04 -0.0001377 -2.997e-05 7.975e-05 0.0003364
elev_grid=read.delim(paste(mainDir, "/data/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
elev_hr = elev_grid[,3]
xps = cbind(lon,lat)
y = matrix(nrow = length(elev_grid[,1]), ncol = nrow(beta))
yse = matrix(nrow = length(elev_grid[,1]), ncol = nrow(beta))
for (i in 1:nrow(beta)) {
zz = Krig(X[,1:2], glmfit$residuals, sigma = theta[i,1], theta = theta[i,3], m = 1, tau2 = theta[i,2])
y2 = predict(zz, x = xps, drop.Z = T)
yse[,i] = predictSE(zz, x = xps, drop.Z = T)
y1 = beta[i,1] + beta[i,2]*lon + beta[i,3]*lat + beta[i,4]*elev_hr
y[,i] = y1 + y2
}
y=logit2prob(y) #to obtain the real value
yse=logit2prob(yse) #to obtain the real value
mean.y = apply(y, 1, FUN = mean) # Take the mean and convert from % to fraction
mean.yse = apply(yse, 1, FUN = mean) # Take the mean and convert from % to fraction
g1 = ggplot() + geom_histogram(aes(as.vector(y)), fill = "gray50", col = "black") +
labs(title = "Histogram of Precipitation Posterior", x = "Predictions") +
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
show(g1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
pred = cbind(mean.y,mean.yse)
ypred = data.frame(pred)
names(ypred) = c("fit","se.fit")
par(.pardefault)
Quilt_plotting(xps[,1],xps[,2],ypred,X[,1],X[,2],prec_bin, type = "binary")
Comments