--- title: "HW2_Prob1" author: "Haiying Peng" date: "2025-03-10" output: html_document --- #### 1.Problem 7 from HW1 with a Bayesian Hierarchical Spatial Model [In the Bayesian models, plot the posterior historgram/PDF of the parameters; spatial maps of posterior mean and standard error] ```{r, message=FALSE, warning=FALSE} #### Clear Memory and Set Options rm(list=ls()) options(warn=-1) save <- FALSE # Change to TRUE if you want to save the plot # Set working directory (Modify if necessary) script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path) setwd(script_dir) source("HW2_Library.R") ``` ##### Read data ```{r, echo=TRUE} climatol_data <- read.table("data/climatol_ann.txt", header=FALSE) names(climatol_data) <- c("Longitude", "Latitude", "Elevation", "AnnualPrecipitation") distance_data <- read.table("data/2b-climatol_ann_with_distances.txt", header=TRUE) #### Rename columns in distance_data to match climatol_data names(distance_data) <- c("Longitude", "Latitude", "Elevation", "Dist_ArabianSea", "Dist_BayOfBengal") #### Merge the two datasets merged_data <- merge(climatol_data, distance_data, by=c("Longitude", "Latitude", "Elevation")) prec_obs <- merged_data$AnnualPrecipitation # Extract precipitation column #### Create Design Matrix X <- merged_data[, c("Longitude", "Latitude", "Elevation", "Dist_ArabianSea", "Dist_BayOfBengal")] names(X) <- c("Long", "Lat", "Elev", "DistArabian", "DistBay") lon <- X[,1] lat <- X[,2] ele <- X[,3] dista <- X[,4] distb <- X[,5] precip <- prec_obs/10 #convert to cm xs = cbind(lon, lat) ``` #### Fit GLM ```{r, echo=TRUE} glmfit = glm(prec_obs ~ ., data = X, family = Gamma(link = "log")) if (save) { output_file <- "1-glmfit_summary.txt" sink(output_file) } print(summary(glmfit)) if (save) { sink() } ``` #### Fit variogram and perform MCMC ```{r, echo=TRUE} geod = as.geodata(cbind(glmfit$residuals, lon, lat), data.col = 1) vg = variog(geod,breaks = seq(50,10000,50)) sigma.sq = median(vg$v) sigma.sq.lo = 0.25*sigma.sq sigma.sq.hi = 1.75*sigma.sq phi.val = median(vg$u) phi.lo = 0.25*phi.val phi.hi = 1.75*phi.val # Input number of samples n.samples = 1500 # Coordinates of observations coords = cbind(lon, lat) # Prior Distributions Defined priors = list("beta.flat", "phi.unif" = c(phi.lo,phi.hi),"sigma.sq.ig"=c(sigma.sq.lo,sigma.sq.hi),"tau.sq.IG"=c(1,0.01)) # Starting values for MCMC resampling starting = list("phi" = phi.val, "sigma.sq" = sigma.sq, "tau.sq" = 1) # Adjustment factor for MCMC sampling routine tuning = list("phi" = 1, "sigma.sq" = 1, "tau.sq" = 1) # Perform Monte Carlo Markov Chain Analysis bat.fm = spLM(prec_obs ~ ., data = X, coords = as.matrix(X[,1:2]), priors = priors, tuning = tuning, starting = starting, cov.model = "exponential", n.samples = n.samples, verbose = F, n.report = 50) # Add some small amount to duplicated coordinates dup = duplicated(bat.fm$coords); bat.fm$coords[dup] <- bat.fm$coords[dup] + 1e-3 # Burn-In Samples bat.fm = spRecover(bat.fm, start = ((1/3)*n.samples)+1, thin = 1, verbose = T) ``` #### Plot Posterior PDFs ```{r, echo=TRUE} beta = bat.fm$p.beta.recover.samples theta = bat.fm$p.theta.recover.samples if (save) pdf("1-posterior_plots.pdf") # Adjust plotting layout par(mfrow=c(2,2), mar=c(4,4,2,1)) for (i in 1:6) { dens <- density(beta[,i], adjust=1) plot.ts(beta[,i], main=paste("Trace of", colnames(beta)[i]), ylab="Value", xlab="Iterations") plot(density(beta[,i]), main=paste("Density of", colnames(beta)[i]), xlab=paste("N=", length(beta[,i]), "Bandwidth=", round(dens$bw, 2))) } par(mfrow=c(3,2), mar=c(4,4,2,1)) for (i in 1:3) { dens <- density(theta[,i], adjust=1) plot.ts(theta[,i], main=paste("Trace of", colnames(theta)[i]), ylab="Value", xlab="Iterations") plot(density(theta[,i]), main=paste("Density of", colnames(theta)[i]), xlab=paste("N=", length(theta[,i]), "Bandwidth=", round(dens$bw, 2))) } if (save) dev.off() summary(window(bat.fm$p.beta.recover.samples)) ``` #### Plot precipitation posterior pdf, predicted precipitation, and standard error ```{r, message=FALSE, warning=FALSE, echo=TRUE} ##precipitation posterior pdf library(parallel) # Read the high-resolution elevation grid grid_data <- read.table("data/india-grid-topo.txt", header=FALSE) colnames(grid_data) <- c("Longitude", "Latitude", "Elevation") lon_hr <- grid_data[,1] lat_hr <- grid_data[,2] elev_hr <- grid_data[,3] xps <- cbind(lon_hr, lat_hr) n_beta <- nrow(beta) y <- matrix(NA, nrow = nrow(grid_data), ncol = n_beta) yse <- matrix(NA, nrow = nrow(grid_data), ncol = n_beta) # Set up parallel cluster n.cores <- max(1, detectCores() - 2) cl <- makeCluster(n.cores) # Load fields package and export variables to workers invisible(clusterEvalQ(cl, library(fields))) clusterExport(cl, varlist = c("xs", "xps", "theta", "beta", "glmfit", "lon_hr", "lat_hr", "elev_hr", "n_beta"), envir = environment()) # Run parallel Kriging and prediction results <- parLapply(cl, 1:n_beta, function(i) { zz <- Krig(xs, glmfit$residuals, sigma = theta[i,1], theta = theta[i,3], m = 1, tau2 = theta[i,2]) y2 <- predict(zz, x = xps, drop.Z = TRUE) yse_i <- predictSE(zz, x = xps, drop.Z = TRUE) y1 <- beta[i,1] + beta[i,2]*lon_hr + beta[i,3]*lat_hr + beta[i,4]*elev_hr y_i <- y1 + y2 list(y = y_i, yse = yse_i) }) # Stop cluster stopCluster(cl) # Combine results for (i in 1:n_beta) { y[,i] <- results[[i]]$y yse[,i] <- results[[i]]$yse } mean.y <- apply(y, 1, FUN = mean) # Take the mean and convert from % to fraction mean.yse <- apply(yse, 1, FUN = mean) # Take the mean and convert from % to fraction # Plot histogram of posterior predictions g1 <- ggplot() + geom_histogram(aes(as.vector(y)), fill = "gray50", col = "black") + labs(title = "1-Histogram of Precipitation Posterior", x = "Predictions") + theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5)) print(g1) ``` #### Spatially map the model estimates and standard error on the high-resolution grid ```{r, echo=TRUE, results='show'} # Read the Rajeevan grid defining India's area rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE) colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation") # Perform an inner join to retain only points within India's defined region india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation")) # Use only the rows in 'grid_data' that match the India mask matched_idx <- match( paste(india_grid$Longitude, india_grid$Latitude, india_grid$Elevation), paste(grid_data$Longitude, grid_data$Latitude, grid_data$Elevation) ) # Add predicted results from the parallel Kriging india_grid$PredictedPrecip <- mean.y[matched_idx] * 10 # Convert to mm india_grid$StandardError <- mean.yse[matched_idx] * 10 # Convert to mm # Plot estimates and standard error on high-resolution grid #### Define Color Palettes prec_palette <- hcl.colors(10, palette = "Zissou 1") se_palette <- hcl.colors(10, palette = "Inferno") #### Plot 1: Predicted Precipitation Map g1 <- ggplot(india_grid) + geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) + # Use "Long" and "Lat" scale_fill_gradientn(name = "Precipitation (mm)", colours = prec_palette, na.value = "white") + theme_bw() + coord_fixed() + ggtitle("Spatial Distribution of Predicted Precipitation") #### Plot 2: Standard Error Map g2 <- ggplot(india_grid) + geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) + # Use "Long" and "Lat" scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") + theme_bw() + coord_fixed() + ggtitle("Spatial Distribution of Prediction Standard Error") #### Save Plots if (save) { ggsave("1-Predicted_Precipitation_Map.pdf", g1, width=8, height=6) ggsave("1-Standard_Error_Map.pdf", g2, width=8, height=6) } #### Display Plots print(g1) print(g2) ```