Bayesian Hierarchical Problem 11 of HW1. Repeat 9. Of HW1 with a Bayesian Hierarchical Spatial Model (see Verdin et al. 2015, for tips).

In the Bayesian models, plot the posterior historgram/PDF of all the parameters; spatial maps of posterior mean and standard error.

The first level can be a GLM with elevation and the second level the spatial model.

###########HW2,Problem 1 : Bayesian Hierarchical Spatial Model
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)

#data required: 
Month=1 # 1: Monthly precipitation of January; 2: Monthly precipitation of July

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW2"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW2.R"))
source("Lib_HW2.R")

Read in data

#  read the topography map

x0 = read.table("C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW2/data/colo_dem.dat")
names(x0)=c("lat","long","elev")
x1 = x0[,1:3]
## Read data as table (data frame)
test = read.table(paste("C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW2/data/colo_monthly_precip_0",Month,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("lat","long","elev","Pm")
test1=test
#filtering the -999 values
test[test == -999.999] <- NA
test[test== 0] <- 0.0001 
test = test[complete.cases(test), ] 
#delete duplicate locations
dup_loc <- dup.coords(test[,1:2]) 
test=test[-as.numeric(dup_loc[1,]), ] 
Pm = test[,4]     #Dependent Variable in mm
X = test[,1:3]# Independent variable.

Fit GLM

#######################################################################################
################## I Fit a 'best' GLM model with elevation....  ###################
#######################################################################################
# Find GLM model 
##Gamma 
glmfit= glm(Pm ~ ., data = X, family = Gamma(link="log"))
summary(glmfit)
## 
## Call:
## glm(formula = Pm ~ ., family = Gamma(link = "log"), data = X)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.5703  -0.4224  -0.0676   0.2507   1.6506  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.451e+01  1.995e+00  -7.274 1.61e-12 ***
## lat          5.194e-02  2.241e-02   2.317    0.021 *  
## long        -1.302e-01  1.675e-02  -7.774 5.43e-14 ***
## elev         8.797e-04  4.344e-05  20.250  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.2862417)
## 
##     Null deviance: 401.74  on 443  degrees of freedom
## Residual deviance: 156.12  on 440  degrees of freedom
## AIC: 3574.1
## 
## Number of Fisher Scoring iterations: 7

Fit variogram and perform MCMC

# obtain initial estimate of effective range (phi) 
geod = as.geodata(cbind(glmfit$residuals,X$long,X$lat),data.col=1)
## as.geodata: 3 replicated data locations found. 
##  Consider using jitterDupCoords() for jittering replicated locations. 
## WARNING: there are data at coincident or very closed locations, some of the geoR's functions may not work.
##  Use function dup.coords() to locate duplicated coordinates.
##  Consider using jitterDupCoords() for jittering replicated locations
vg = variog(geod,breaks=seq(50,10000,50))
## variog: computing omnidirectional variogram
## variog: co-locatted data found, adding one bin at the origin
sigma.sq = median(vg$v)
sigma.sq.lo = 0.25*sigma.sq
sigma.sq.hi = 1.75*sigma.sq
phi.val = median(vg$u) 
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val

# Input number of samples
n.samples = 1500
# coordinates of observations
coords = cbind(X$long,X$lat) 
# prior distributions defined (limited distributions available... see help(spLM)
priors = list("beta.flat", "phi.unif"=c(phi.lo,phi.hi),
               "sigma.sq.ig"=c(sigma.sq.lo,sigma.sq.hi),"tau.sq.IG"=c(1, 0.01))
# starting values for MCMC resampling
starting = list("phi"=phi.val, "sigma.sq"=sigma.sq, "tau.sq"=1)
# adjustment factor for MCMC sampling routine
tuning = list("phi"=1, "sigma.sq"=1,"tau.sq"=1) 

# Perform Monte Carlo Markov Chain Analysis
bat.fm = spLM(Pm~., data=X, coords=as.matrix(X[,1:2]), priors=priors, tuning=tuning, starting=starting, cov.model = "exponential", n.samples = n.samples, verbose = FALSE, n.report = 50)
#add some small amount to duplicated coordinates
dup = duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3
# burn-in samples
bat.fm = spRecover(bat.fm, start=((1/3)*n.samples)+1, thin=1, verbose=T)
## -------------------------------------------------
##      Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 1000, 9.90%
## Sampled: 199 of 1000, 19.90%
## Sampled: 299 of 1000, 29.90%
## Sampled: 399 of 1000, 39.90%
## Sampled: 499 of 1000, 49.90%
## Sampled: 599 of 1000, 59.90%
## Sampled: 699 of 1000, 69.90%
## Sampled: 799 of 1000, 79.90%
## Sampled: 899 of 1000, 89.90%
## Sampled: 999 of 1000, 99.90%

Plot posterior pdfs

# plot posterior pdfs
beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples
plot(beta)

plot(theta)

summary(window(bat.fm$p.beta.recover.samples))
## 
## Iterations = 1:1000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 1000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                  Mean        SD  Naive SE Time-series SE
## (Intercept) -49.72617 1.550e+02 4.903e+00      4.664e+00
## lat           3.05298 1.909e+00 6.036e-02      6.036e-02
## long          1.24111 1.328e+00 4.198e-02      4.198e-02
## elev          0.04443 2.657e-03 8.402e-05      8.402e-05
## 
## 2. Quantiles for each variable:
## 
##                   2.5%        25%       50%     75%     97.5%
## (Intercept) -357.66594 -153.26870 -46.45674 45.7950 267.13406
## lat           -0.72962    1.76865   3.07323  4.2733   6.88521
## long          -1.50534    0.38761   1.26488  2.1320   3.89319
## elev           0.03882    0.04263   0.04456  0.0462   0.04956

Plot precipitation posterior pdf, predicted precipitation, and standard error

###################################################################
## II. Spatially map the model estimates and the standard error ####
###################################################################

y = matrix(nrow=length(x1[,1]), ncol=nrow(beta))
yse = matrix(nrow=length(x1[,1]), ncol=nrow(beta))


for (i in 1:nrow(beta)){
  zz = Krig(X[,1:2],glmfit$residuals,rho=theta[i,1],theta=theta[i,3],m=1,sigma2=theta[i,2])
  y2 = predict.Krig(zz,x=x1[,1:2],drop.Z=TRUE) 
  yse[,i] = predictSE(zz, x=x1[,1:2], drop.Z=TRUE) 
  y1 = beta[i,1] + beta[i,2]*x1[,1]+beta[i,3]*x1[,2]+beta[i,4]*x1[,3]
  y[,i] = y1+y2
}
mean.y = apply(y, 1, FUN = mean) # take the mean and convert from percent to fraction
mean.yse = apply(yse, 1, FUN = mean) # take the mean and convert from percent to fraction

ggplot() +
 geom_histogram(aes(as.vector(y)),fill="gray50",col='black') +
  labs(title = "Histogram of Precipitation Posterior",
       x = "Predictions")+
  #theme_light()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

###################################################################
## II. Spatially map the model estimates and the standard error ####
###################################################################
Pred=cbind(mean.y,mean.yse)
ypred=data.frame(Pred)
names(ypred)=c("fit","se")
#Quilt_plotting(x1[,2],x1[,1],ypred)
Quilt_plotting(x1[,2],x1[,1],ypred,X[,2],X[,1],Pm)

###Final comments###