2.Use a precipitation threshold of 150 cm across all the stations and categorize the precipitation at each station to a binary variable – 0 if it is less than the threshold, 1, otherwise. Use the suite of predictors from HW1.

  1. Fit a ‘best’ GLM (i.e. logistic regression) with the appropriate link function using one of the objective functions. Test the model goodness using ANOVA
  2. Estimate the function on the grid and plot the surface. Also plot the standard error.
  3. Repeat (i) and (ii) with Local GLM
#### Clear Memory and Set Options
rm(list=ls())
options(warn=-1)
save <- TRUE  # Change to TRUE if you want to save the plot

# Set working directory (Modify if necessary)
script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(script_dir)

source("HW2_Library.R")
Read data
climatol_data <- read.table("data/climatol_ann.txt", header=FALSE)
names(climatol_data) <- c("Longitude", "Latitude", "Elevation", "AnnualPrecipitation")

X <- climatol_data[,1:3] 
names(X) <- c("Long", "Lat", "Elev")

prec_obs <- climatol_data$AnnualPrecipitation  # Extract precipitation column
precip <- prec_obs/10 #convert to cm

## Categorize precipitation to a binary variable
q = 150
prec_bin = rep(1, length(precip))

for (i in 1:length(precip)) {
  if (precip[i] < q) {
    prec_bin[i] = 0
  }
}
Plot observed vs. predicted precipitation
prec_pred = glmfit$fitted.values
par(mfrow=c(1,1))
lim=range(prec_bin,prec_pred)
plot(prec_bin,prec_pred,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)

Test model goodness using ANOVA
anova(glmfit)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: prec_obs
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                   245     204.82              
## Long  1   25.797       244     179.03 3.793e-07 ***
## Lat   1   14.659       243     164.37 0.0001289 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(ii) Estimate the function on the grid and plot the surface. Also plot the standard error
# Read the high-resolution elevation grid

  grid_data <- read.table("data/india-grid-topo.txt", header=FALSE)
  colnames(grid_data) <- c("Longitude", "Latitude", "Elevation")

# Read the Rajeevan grid defining India's area
rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE)
colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation")


# Perform an inner join to retain only points within India's defined region
india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation"))
covariate_names <- c("Longitude", "Latitude", "Elevation")
X_grid <- india_grid[, covariate_names, drop=FALSE]
colnames(X_grid) <- c("Long","Lat","Elev")



# Predict precipitation probabilities using the fitted model
india_grid$PredictedPrecip <- predict(glmfit, newdata = X_grid, type = "response")

# Estimate standard error of predictions
pred_se <- predict(glmfit, newdata = X_grid, se.fit = TRUE, type = "response")
india_grid$StandardError <- pred_se$se.fit

# Plot estimates and standard error on high-resolution grid


#### Define Color Palettes
  prec_palette <- hcl.colors(10, palette = "Zissou 1")  
  se_palette <- hcl.colors(10, palette = "Inferno")  

  #### Plot 1: Predicted Precipitation Map
  g1 <- ggplot(india_grid) +
    geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) +  # Use "Long" and "Lat"
    scale_fill_gradientn(name = "Precipitation", colours = prec_palette, na.value = "white") +
    theme_bw() +
    coord_fixed() +
    ggtitle("Spatial Distribution of Predicted Precipitation")

  #### Plot 2: Standard Error Map
  g2 <- ggplot(india_grid) +
    geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) +  # Use "Long" and "Lat"
    scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") +
    theme_bw() +
    coord_fixed() +
    ggtitle("Spatial Distribution of Prediction Standard Error")

  #### Save Plots
  if (save) {
    ggsave("6-Predicted_Precipitation_Map.pdf", g1, width=8, height=6)
    ggsave("6-Standard_Error_Map.pdf", g2, width=8, height=6)
  }

  #### Display Plots
  print(g1)

  print(g2)

(iii) Repeat (i) and (ii) with Local GLM

Plot observed vs. predicted precipitation
prec_pred = predict(locfit, Xnew, se.fit = T)
par(mfrow=c(1,1))
lim=range(prec_bin,prec_pred$fit)
plot(prec_bin,prec_pred$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)

Test model goodness using ANOVA
loc_Ftest(prec_bin,prec_pred, Xnew, locfit)
## [1] "F-test:"
## [1] "Reject the Null because F(local poly) = 4.07 > 1.46 = F(linear model)."
Estimate the function on the grid and plot the surface. Also plot the standard error
# Read the high-resolution elevation grid

  grid_data <- read.table("data/india-grid-topo.txt", header=FALSE)
  colnames(grid_data) <- c("Longitude", "Latitude", "Elevation")

# Read the Rajeevan grid defining India's area
rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE)
colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation")


# Perform an inner join to retain only points within India's defined region
india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation"))
covariate_names <- c("Longitude", "Latitude", "Elevation")
X_grid <- india_grid[, covariate_names, drop=FALSE]
colnames(X_grid) <- c("Long","Lat","Elev")



# Predict precipitation probabilities using the fitted model
india_grid$PredictedPrecip <- predict(locfit, newdata = X_grid, type = "response")

# Estimate standard error of predictions
pred_se <- predict(locfit, newdata = X_grid, se.fit = TRUE, type = "response")
india_grid$StandardError <- pred_se$se.fit

# Plot estimates and standard error on high-resolution grid


#### Define Color Palettes
  prec_palette <- hcl.colors(10, palette = "Zissou 1")  
  se_palette <- hcl.colors(10, palette = "Inferno")  

  #### Plot 1: Predicted Precipitation Map
  g1 <- ggplot(india_grid) +
    geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) +  # Use "Long" and "Lat"
    scale_fill_gradientn(name = "Precipitation", colours = prec_palette, na.value = "white") +
    theme_bw() +
    coord_fixed() +
    ggtitle("Spatial Distribution of Predicted Precipitation")

  #### Plot 2: Standard Error Map
  g2 <- ggplot(india_grid) +
    geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) +  # Use "Long" and "Lat"
    scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") +
    theme_bw() +
    coord_fixed() +
    ggtitle("Spatial Distribution of Prediction Standard Error")

  #### Save Plots
  if (save) {
    ggsave("2-Predicted_Precipitation_Map.pdf", g1, width=8, height=6)
    ggsave("2-Standard_Error_Map.pdf", g2, width=8, height=6)
  }

  #### Display Plots
  print(g1)

  print(g2)