---
title: "HW2_Prob1"
author: "Haiying Peng"
date: "2025-03-10"
output: html_document
---
#### 1.Problem 7 from HW1 with a Bayesian Hierarchical Spatial Model
[In the Bayesian models, plot the posterior historgram/PDF of the parameters; spatial 
maps of posterior mean and standard error]

```{r, message=FALSE, warning=FALSE}
#### Clear Memory and Set Options
rm(list=ls())
options(warn=-1)
save <- FALSE  # Change to TRUE if you want to save the plot

# Set working directory (Modify if necessary)
script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(script_dir)

source("HW2_Library.R")

```
##### Read data
```{r, echo=TRUE}

climatol_data <- read.table("data/climatol_ann.txt", header=FALSE)
names(climatol_data) <- c("Longitude", "Latitude", "Elevation", "AnnualPrecipitation")

distance_data <- read.table("data/2b-climatol_ann_with_distances.txt", header=TRUE)

#### Rename columns in distance_data to match climatol_data
names(distance_data) <- c("Longitude", "Latitude", "Elevation", "Dist_ArabianSea", "Dist_BayOfBengal")

#### Merge the two datasets
merged_data <- merge(climatol_data, distance_data, by=c("Longitude", "Latitude", "Elevation"))
prec_obs <- merged_data$AnnualPrecipitation  # Extract precipitation column

#### Create Design Matrix
X <- merged_data[, c("Longitude", "Latitude", "Elevation", "Dist_ArabianSea", "Dist_BayOfBengal")]
names(X) <- c("Long", "Lat", "Elev", "DistArabian", "DistBay")

lon <- X[,1]
lat <- X[,2]
ele <- X[,3]
dista <- X[,4]
distb <- X[,5]

precip <- prec_obs/10 #convert to cm
xs = cbind(lon, lat)
```

#### Fit GLM
```{r, echo=TRUE}
glmfit = glm(prec_obs ~ ., data = X, family = Gamma(link = "log"))

if (save) {
output_file <- "1-glmfit_summary.txt"
sink(output_file)
}

print(summary(glmfit))

if (save) {
sink()
}

```
#### Fit variogram and perform MCMC

```{r, echo=TRUE}
geod = as.geodata(cbind(glmfit$residuals, lon, lat), data.col = 1)
vg = variog(geod,breaks = seq(50,10000,50))

sigma.sq = median(vg$v)
sigma.sq.lo = 0.25*sigma.sq
sigma.sq.hi = 1.75*sigma.sq
phi.val = median(vg$u)
phi.lo = 0.25*phi.val
phi.hi = 1.75*phi.val
# Input number of samples
n.samples = 1500
# Coordinates of observations
coords = cbind(lon, lat)
# Prior Distributions Defined
priors = list("beta.flat", "phi.unif" = c(phi.lo,phi.hi),"sigma.sq.ig"=c(sigma.sq.lo,sigma.sq.hi),"tau.sq.IG"=c(1,0.01))
# Starting values for MCMC resampling
starting = list("phi" = phi.val, "sigma.sq" = sigma.sq, "tau.sq" = 1)
# Adjustment factor for MCMC sampling routine
tuning = list("phi" = 1, "sigma.sq" = 1, "tau.sq" = 1)

# Perform Monte Carlo Markov Chain Analysis
bat.fm = spLM(prec_obs ~ ., data = X, coords = as.matrix(X[,1:2]), priors = priors, tuning = tuning, starting = starting, cov.model = "exponential", n.samples = n.samples, verbose = F, n.report = 50)
# Add some small amount to duplicated coordinates
dup = duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3
# Burn-In Samples
bat.fm = spRecover(bat.fm, start = ((1/3)*n.samples)+1, thin = 1, verbose = T)

```
#### Plot Posterior PDFs
```{r, echo=TRUE}
beta = bat.fm$p.beta.recover.samples
theta = bat.fm$p.theta.recover.samples


if (save) pdf("1-posterior_plots.pdf")

# Adjust plotting layout
par(mfrow=c(2,2), mar=c(4,4,2,1))
for (i in 1:6) { 
    dens <- density(beta[,i], adjust=1)
    plot.ts(beta[,i], main=paste("Trace of", colnames(beta)[i]), ylab="Value", xlab="Iterations")
    plot(density(beta[,i]), main=paste("Density of", colnames(beta)[i]), xlab=paste("N=", length(beta[,i]), "Bandwidth=", round(dens$bw, 2)))
}

par(mfrow=c(3,2), mar=c(4,4,2,1))
for (i in 1:3) { 
    dens <- density(theta[,i], adjust=1)
    plot.ts(theta[,i], main=paste("Trace of", colnames(theta)[i]), ylab="Value", xlab="Iterations")
    plot(density(theta[,i]), main=paste("Density of", colnames(theta)[i]), xlab=paste("N=", length(theta[,i]), "Bandwidth=", round(dens$bw, 2)))
}
if (save) dev.off()
summary(window(bat.fm$p.beta.recover.samples))
```

#### Plot precipitation posterior pdf, predicted precipitation, and standard error
```{r, message=FALSE, warning=FALSE, echo=TRUE}
##precipitation posterior pdf
library(parallel)


  # Read the high-resolution elevation grid
  grid_data <- read.table("data/india-grid-topo.txt", header=FALSE)
  colnames(grid_data) <- c("Longitude", "Latitude", "Elevation")
  

  
  lon_hr <- grid_data[,1]
  lat_hr <- grid_data[,2]
  elev_hr <- grid_data[,3]
  xps <- cbind(lon_hr, lat_hr)
  
  n_beta <- nrow(beta)
  y <- matrix(NA, nrow = nrow(grid_data), ncol = n_beta)
  yse <- matrix(NA, nrow = nrow(grid_data), ncol = n_beta)
  
  # Set up parallel cluster
  n.cores <- max(1, detectCores() - 2)
  cl <- makeCluster(n.cores)
  # Load fields package and export variables to workers
  invisible(clusterEvalQ(cl, library(fields)))
  clusterExport(cl, varlist = c("xs", "xps", "theta", "beta", "glmfit", "lon_hr", "lat_hr", "elev_hr", "n_beta"), envir = environment())

# Run parallel Kriging and prediction
results <- parLapply(cl, 1:n_beta, function(i) {
  
  zz <- Krig(xs, glmfit$residuals, sigma = theta[i,1], theta = theta[i,3], m = 1, tau2 = theta[i,2])
  y2 <- predict(zz, x = xps, drop.Z = TRUE)
  yse_i <- predictSE(zz, x = xps, drop.Z = TRUE)
  y1 <- beta[i,1] + beta[i,2]*lon_hr + beta[i,3]*lat_hr + beta[i,4]*elev_hr
  y_i <- y1 + y2
  list(y = y_i, yse = yse_i)
})
  
  # Stop cluster
stopCluster(cl)

# Combine results
for (i in 1:n_beta) {
  y[,i] <- results[[i]]$y
  yse[,i] <- results[[i]]$yse
}


mean.y <- apply(y, 1, FUN = mean)    # Take the mean and convert from % to fraction
mean.yse <- apply(yse, 1, FUN = mean)  # Take the mean and convert from % to fraction

# Plot histogram of posterior predictions
  g1 <- ggplot() + geom_histogram(aes(as.vector(y)), fill = "gray50", col = "black") +
  labs(title = "1-Histogram of Precipitation Posterior", x = "Predictions") +
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
  print(g1)
```


#### Spatially map the model estimates and standard error on the high-resolution grid
```{r, echo=TRUE, results='show'}

# Read the Rajeevan grid defining India's area
rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE)
colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation")


# Perform an inner join to retain only points within India's defined region
india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation"))

# Use only the rows in 'grid_data' that match the India mask
matched_idx <- match(
  paste(india_grid$Longitude, india_grid$Latitude, india_grid$Elevation),
  paste(grid_data$Longitude, grid_data$Latitude, grid_data$Elevation)
)

# Add predicted results from the parallel Kriging
india_grid$PredictedPrecip <- mean.y[matched_idx] * 10   # Convert to mm
india_grid$StandardError   <- mean.yse[matched_idx] * 10  # Convert to mm


# Plot estimates and standard error on high-resolution grid
#### Define Color Palettes
  prec_palette <- hcl.colors(10, palette = "Zissou 1")  
  se_palette <- hcl.colors(10, palette = "Inferno")  

  #### Plot 1: Predicted Precipitation Map
  g1 <- ggplot(india_grid) +
    geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) +  # Use "Long" and "Lat"
    scale_fill_gradientn(name = "Precipitation (mm)", colours = prec_palette, na.value = "white") +
    theme_bw() +
    coord_fixed() +
    ggtitle("Spatial Distribution of Predicted Precipitation")

  #### Plot 2: Standard Error Map
  g2 <- ggplot(india_grid) +
    geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) +  # Use "Long" and "Lat"
    scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") +
    theme_bw() +
    coord_fixed() +
    ggtitle("Spatial Distribution of Prediction Standard Error")

  #### Save Plots
  if (save) {
    ggsave("1-Predicted_Precipitation_Map.pdf", g1, width=8, height=6)
    ggsave("1-Standard_Error_Map.pdf", g2, width=8, height=6)
  }

  #### Display Plots
  print(g1)
  print(g2)

```