[In the Bayesian models, plot the posterior historgram/PDF of the parameters; spatial maps of posterior mean and standard error]
#### Clear Memory and Set Options
rm(list=ls())
options(warn=-1)
save <- FALSE # Change to TRUE if you want to save the plot
# Set working directory (Modify if necessary)
script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(script_dir)
source("HW2_Library.R")
climatol_data <- read.table("data/climatol_ann.txt", header=FALSE)
names(climatol_data) <- c("Longitude", "Latitude", "Elevation", "AnnualPrecipitation")
distance_data <- read.table("data/2b-climatol_ann_with_distances.txt", header=TRUE)
#### Rename columns in distance_data to match climatol_data
names(distance_data) <- c("Longitude", "Latitude", "Elevation", "Dist_ArabianSea", "Dist_BayOfBengal")
#### Merge the two datasets
merged_data <- merge(climatol_data, distance_data, by=c("Longitude", "Latitude", "Elevation"))
prec_obs <- merged_data$AnnualPrecipitation # Extract precipitation column
#### Create Design Matrix
X <- merged_data[, c("Longitude", "Latitude", "Elevation", "Dist_ArabianSea", "Dist_BayOfBengal")]
names(X) <- c("Long", "Lat", "Elev", "DistArabian", "DistBay")
lon <- X[,1]
lat <- X[,2]
ele <- X[,3]
dista <- X[,4]
distb <- X[,5]
precip <- prec_obs/10 #convert to cm
xs = cbind(lon, lat)
glmfit = glm(prec_obs ~ ., data = X, family = Gamma(link = "log"))
if (save) {
output_file <- "1-glmfit_summary.txt"
sink(output_file)
}
print(summary(glmfit))
##
## Call:
## glm(formula = prec_obs ~ ., family = Gamma(link = "log"), data = X)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.431e+00 5.134e+00 -1.837 0.067422 .
## Long 2.175e-01 6.490e-02 3.351 0.000937 ***
## Lat 8.008e-03 2.726e-02 0.294 0.769193
## Elev -1.032e-04 9.631e-05 -1.072 0.284810
## DistArabian -1.578e-01 6.420e-02 -2.459 0.014650 *
## DistBay 5.025e-02 3.161e-02 1.590 0.113210
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.2055282)
##
## Null deviance: 61.286 on 245 degrees of freedom
## Residual deviance: 36.151 on 240 degrees of freedom
## AIC: 3646.9
##
## Number of Fisher Scoring iterations: 14
if (save) {
sink()
}
geod = as.geodata(cbind(glmfit$residuals, lon, lat), data.col = 1)
vg = variog(geod,breaks = seq(50,10000,50))
## variog: computing omnidirectional variogram
sigma.sq = median(vg$v)
sigma.sq.lo = 0.25*sigma.sq
sigma.sq.hi = 1.75*sigma.sq
phi.val = median(vg$u)
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val
# Input number of samples
n.samples = 1500
# Coordinates of observations
coords = cbind(lon, lat)
# Prior Distributions Defined
priors = list("beta.flat", "phi.unif" = c(phi.lo,phi.hi),"sigma.sq.ig"=c(sigma.sq.lo,sigma.sq.hi),"tau.sq.IG"=c(1,0.01))
# Starting values for MCMC resampling
starting = list("phi" = phi.val, "sigma.sq" = sigma.sq, "tau.sq" = 1)
# Adjustment factor for MCMC sampling routine
tuning = list("phi" = 1, "sigma.sq" = 1, "tau.sq" = 1)
# Perform Monte Carlo Markov Chain Analysis
bat.fm = spLM(prec_obs ~ ., data = X, coords = as.matrix(X[,1:2]), priors = priors, tuning = tuning, starting = starting, cov.model = "exponential", n.samples = n.samples, verbose = F, n.report = 50)
# Add some small amount to duplicated coordinates
dup = duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3
# Burn-In Samples
bat.fm = spRecover(bat.fm, start = ((1/3)*n.samples)+1, thin = 1, verbose = T)
## -------------------------------------------------
## Recovering beta and w
## -------------------------------------------------
## Sampled: 99 of 1000, 9.90%
## Sampled: 199 of 1000, 19.90%
## Sampled: 299 of 1000, 29.90%
## Sampled: 399 of 1000, 39.90%
## Sampled: 499 of 1000, 49.90%
## Sampled: 599 of 1000, 59.90%
## Sampled: 699 of 1000, 69.90%
## Sampled: 799 of 1000, 79.90%
## Sampled: 899 of 1000, 89.90%
## Sampled: 999 of 1000, 99.90%
beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples
if (save) pdf("1-posterior_plots.pdf")
# Adjust plotting layout
par(mfrow=c(2,2), mar=c(4,4,2,1))
for (i in 1:6) {
dens <- density(beta[,i], adjust=1)
plot.ts(beta[,i], main=paste("Trace of", colnames(beta)[i]), ylab="Value", xlab="Iterations")
plot(density(beta[,i]), main=paste("Density of", colnames(beta)[i]), xlab=paste("N=", length(beta[,i]), "Bandwidth=", round(dens$bw, 2)))
}
par(mfrow=c(3,2), mar=c(4,4,2,1))
for (i in 1:3) {
dens <- density(theta[,i], adjust=1)
plot.ts(theta[,i], main=paste("Trace of", colnames(theta)[i]), ylab="Value", xlab="Iterations")
plot(density(theta[,i]), main=paste("Density of", colnames(theta)[i]), xlab=paste("N=", length(theta[,i]), "Bandwidth=", round(dens$bw, 2)))
}
if (save) dev.off()
summary(window(bat.fm$p.beta.recover.samples))
##
## Iterations = 1:1000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## (Intercept) -1.474e+04 5855.7716 1.852e+02 1.852e+02
## Long 2.134e+02 74.0773 2.343e+00 2.343e+00
## Lat -2.640e+01 30.8640 9.760e-01 7.978e-01
## Elev -1.318e-01 0.1115 3.526e-03 3.252e-03
## DistArabian -1.189e+02 72.4791 2.292e+00 2.055e+00
## DistBay 8.476e+01 37.3607 1.181e+00 1.181e+00
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## (Intercept) -2.623e+04 -1.860e+04 -1.453e+04 -1.086e+04 -3.137e+03
## Long 6.646e+01 1.648e+02 2.109e+02 2.627e+02 3.588e+02
## Lat -8.530e+01 -4.656e+01 -2.616e+01 -5.374e+00 3.352e+01
## Elev -3.412e-01 -2.104e-01 -1.342e-01 -5.424e-02 8.596e-02
## DistArabian -2.618e+02 -1.684e+02 -1.184e+02 -6.977e+01 2.473e+01
## DistBay 9.971e+00 6.105e+01 8.561e+01 1.106e+02 1.565e+02
##precipitation posterior pdf
library(parallel)
# Read the high-resolution elevation grid
grid_data <- read.table("data/india-grid-topo.txt", header=FALSE)
colnames(grid_data) <- c("Longitude", "Latitude", "Elevation")
lon_hr <- grid_data[,1]
lat_hr <- grid_data[,2]
elev_hr <- grid_data[,3]
xps <- cbind(lon_hr, lat_hr)
n_beta <- nrow(beta)
y <- matrix(NA, nrow = nrow(grid_data), ncol = n_beta)
yse <- matrix(NA, nrow = nrow(grid_data), ncol = n_beta)
# Set up parallel cluster
n.cores <- max(1, detectCores() - 2)
cl <- makeCluster(n.cores)
# Load fields package and export variables to workers
invisible(clusterEvalQ(cl, library(fields)))
clusterExport(cl, varlist = c("xs", "xps", "theta", "beta", "glmfit", "lon_hr", "lat_hr", "elev_hr", "n_beta"), envir = environment())
# Run parallel Kriging and prediction
results <- parLapply(cl, 1:n_beta, function(i) {
zz <- Krig(xs, glmfit$residuals, sigma = theta[i,1], theta = theta[i,3], m = 1, tau2 = theta[i,2])
y2 <- predict(zz, x = xps, drop.Z = TRUE)
yse_i <- predictSE(zz, x = xps, drop.Z = TRUE)
y1 <- beta[i,1] + beta[i,2]*lon_hr + beta[i,3]*lat_hr + beta[i,4]*elev_hr
y_i <- y1 + y2
list(y = y_i, yse = yse_i)
})
# Stop cluster
stopCluster(cl)
# Combine results
for (i in 1:n_beta) {
y[,i] <- results[[i]]$y
yse[,i] <- results[[i]]$yse
}
mean.y <- apply(y, 1, FUN = mean) # Take the mean and convert from % to fraction
mean.yse <- apply(yse, 1, FUN = mean) # Take the mean and convert from % to fraction
# Plot histogram of posterior predictions
g1 <- ggplot() + geom_histogram(aes(as.vector(y)), fill = "gray50", col = "black") +
labs(title = "1-Histogram of Precipitation Posterior", x = "Predictions") +
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
print(g1)
# Read the Rajeevan grid defining India's area
rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE)
colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation")
# Perform an inner join to retain only points within India's defined region
india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation"))
# Use only the rows in 'grid_data' that match the India mask
matched_idx <- match(
paste(india_grid$Longitude, india_grid$Latitude, india_grid$Elevation),
paste(grid_data$Longitude, grid_data$Latitude, grid_data$Elevation)
)
# Add predicted results from the parallel Kriging
india_grid$PredictedPrecip <- mean.y[matched_idx] * 10 # Convert to mm
india_grid$StandardError <- mean.yse[matched_idx] * 10 # Convert to mm
# Plot estimates and standard error on high-resolution grid
#### Define Color Palettes
prec_palette <- hcl.colors(10, palette = "Zissou 1")
se_palette <- hcl.colors(10, palette = "Inferno")
#### Plot 1: Predicted Precipitation Map
g1 <- ggplot(india_grid) +
geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) + # Use "Long" and "Lat"
scale_fill_gradientn(name = "Precipitation (mm)", colours = prec_palette, na.value = "white") +
theme_bw() +
coord_fixed() +
ggtitle("Spatial Distribution of Predicted Precipitation")
#### Plot 2: Standard Error Map
g2 <- ggplot(india_grid) +
geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) + # Use "Long" and "Lat"
scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") +
theme_bw() +
coord_fixed() +
ggtitle("Spatial Distribution of Prediction Standard Error")
#### Save Plots
if (save) {
ggsave("1-Predicted_Precipitation_Map.pdf", g1, width=8, height=6)
ggsave("1-Standard_Error_Map.pdf", g2, width=8, height=6)
}
#### Display Plots
print(g1)
print(g2)