Problem 12 of HW1. Develop a Bayesian Hierarchical Spatial Model for problem 7 of HW1. For this the first level is a Binomial distribution (i.e. logistic regression) You can use the steps/code from Andrew Verdin at http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/bayes/spatial-bayes/ And/or write your own code in RJAGS.
Data and commands for the problems below will be at http://civil.colorado.edu/~balajir/CVEN6833/HWs/HW-2-2018
Solution
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#data required:
day=11 # day of January 1997
#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW2"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW2.R"))
source("Lib_HW2.R")
# Read the topography data map
x0 = read.table("./data/colo_dem.dat")
names(x0)=c("lat","long","elev")
x1 = x0[,1:3]
## Read data as table (data frame)
test = read.table(paste("C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW2/data/colo_pcp_daily_1997_01_",day,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("lat","long","elev","Pm")
test1=test
test[test == -999.999] <- NA
test = test[complete.cases(test), ]
#delete duplicate locations
dup_loc <- dup.coords(test[,1:2])
test=test[-as.numeric(dup_loc[1,]), ]
# convert the precipitation to binary data
Pm= test[,4]>0#Dependent Variable
X = test[,1:3]# Independent variable.
glmfit=GLM_fit(as.data.frame(cbind(X,Pm)),"binomial")
## [1] "Results of PRESS for bestfit GLM"
## glm_press
## logit 342.9802
## probit 343.2803
sprintf("Choosing the GLM which minimizes PRESS: %s family and %s link function.", glmfit$Call$FAMILY, glmfit$Call$LINK)
## [1] "Choosing the GLM which minimizes PRESS: binomial family and logit link function."
summary(glmfit)
##
## Call:
## glm(formula = Y ~ ., family = binomial(link = links[zc_bic]),
## data = xx)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1776 -1.1158 0.6256 0.8550 1.4837
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3313552 10.0578990 -0.132 0.894692
## lat 0.5192660 0.1194255 4.348 1.37e-05 ***
## long 0.1884544 0.0862533 2.185 0.028897 *
## elev 0.0007859 0.0002328 3.376 0.000735 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 363.80 on 287 degrees of freedom
## Residual deviance: 333.11 on 284 degrees of freedom
## AIC: 341.11
##
## Number of Fisher Scoring iterations: 4
geod = as.geodata(cbind(glmfit$residuals,X$long,X$lat),data.col=1)
## as.geodata: 2 replicated data locations found.
## Consider using jitterDupCoords() for jittering replicated locations.
## WARNING: there are data at coincident or very closed locations, some of the geoR's functions may not work.
## Use function dup.coords() to locate duplicated coordinates.
## Consider using jitterDupCoords() for jittering replicated locations
vg = variog(geod,breaks=seq(50,10000,50))
## variog: computing omnidirectional variogram
## variog: co-locatted data found, adding one bin at the origin
phi.val = median(vg$u)
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val
# Input number of samples
n.samples <- 1500
# coordinates of observations
coords <- cbind(X$long,X$lat)
# prior distributions defined (limited distributions available... see help(spLM)
priors <- list("beta.flat", "phi.unif"=c(phi.lo,phi.hi),
"sigma.sq.ig"=c(100, 100),"tau.sq.IG"=c(0.01, 0.001))
# starting values for MCMC resampling
starting <- list("phi"=phi.val, "sigma.sq"=500,"tau.sq"=0.01)
# adjustment factor for MCMC sampling routine
tuning <- list("phi"=1, "sigma.sq"=1, "tau.sq"=1) # WJR why set these to 1??
# Perform Monte Carlo Markov Chain Analysis
bat.fm <- spLM(Pm~., data=X, coords=as.matrix(X[,1:2]), priors=priors, tuning=tuning, starting=starting, cov.model = "exponential", n.samples = n.samples, verbose = FALSE, n.report = 50)
#add some small amount to duplicated coordinates
dup <- duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3
# burn-in samples
bat.fm <- spRecover(bat.fm, start=((1/3)*n.samples)+1, thin=1)
## -------------------------------------------------
## Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 1000, 9.90%
## Sampled: 199 of 1000, 19.90%
## Sampled: 299 of 1000, 29.90%
## Sampled: 399 of 1000, 39.90%
## Sampled: 499 of 1000, 49.90%
## Sampled: 599 of 1000, 59.90%
## Sampled: 699 of 1000, 69.90%
## Sampled: 799 of 1000, 79.90%
## Sampled: 899 of 1000, 89.90%
## Sampled: 999 of 1000, 99.90%
# plot posterior pdfs
beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples
plot(beta)
plot(theta)
summary(window(bat.fm$p.beta.recover.samples))
##
## Iterations = 1:1000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) 0.6825185 6.6031084 2.088e-01 2.088e-01
## lat 0.0813582 0.0853898 2.700e-03 2.525e-03
## long 0.0336542 0.0550883 1.742e-03 1.742e-03
## elev 0.0001581 0.0001013 3.202e-06 3.202e-06
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) -1.242e+01 -3.633e+00 0.6513587 5.3272268 1.395e+01
## lat -8.707e-02 2.893e-02 0.0769258 0.1426181 2.420e-01
## long -8.076e-02 -2.383e-03 0.0357933 0.0726966 1.401e-01
## elev -4.586e-05 8.871e-05 0.0001561 0.0002264 3.572e-04
n.samples2 = floor(n.samples*(2/3))
ylog = matrix(nrow=length(x1[,1]), ncol=n.samples2)
yselog = matrix(nrow=length(x1[,1]), ncol=n.samples2)
for (i in 1:n.samples2){
zz = Krig(X[,1:2],glmfit$residuals,rho=theta[i,1],theta=theta[i,3],m=1,sigma2=theta[i,2])
y2 = predict.Krig(zz,x=x1[,1:2],drop.Z=TRUE)
yselog[,i] = predictSE(zz, x=x1[,1:2], drop.Z=TRUE)
y1 = beta[i,1] + beta[i,2]*x1[,1]+beta[i,3]*x1[,2]+beta[i,4]*x1[,3]
ylog[,i] = y1+y2
}
y=logit2prob(ylog) #to obtain the real value
yse=logit2prob(yselog) #to obtain the real value
mean.y = apply(y, 1, FUN = mean) # take the mean and convert from percent to fraction
mean.yse = apply(yse, 1, FUN = mean) # take the mean and convert from percent to fraction
ggplot() +
geom_histogram(aes(as.vector(y)),fill="gray50",col='black') +
labs(title = "Histogram of Precipitation Posterior",
x = "Predictions")+
#theme_light()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
###################################################################
## II. Spatially map the model estimates and the standard error ####
###################################################################
Pred=cbind(mean.y,mean.yse)
ypred=data.frame(Pred)
names(ypred)=c("fit","se")
Quilt_plotting(x1[,2],x1[,1],ypred,X[,2],X[,1],Pm,type="binary")