--- title: "HW2_Prob2" author: "Haiying Peng" date: "2025-03-12" output: html_document --- #### 2.Use a precipitation threshold of 150 cm across all the stations and categorize the precipitation at each station to a binary variable – 0 if it is less than the threshold, 1, otherwise. Use the suite of predictors from HW1. i. Fit a ‘best’ GLM (i.e. logistic regression) with the appropriate link function using one of the objective functions. Test the model goodness using ANOVA ii. Estimate the function on the grid and plot the surface. Also plot the standard error. iii. Repeat (i) and (ii) with Local GLM ```{r, message=FALSE, warning=FALSE} #### Clear Memory and Set Options rm(list=ls()) options(warn=-1) save <- TRUE # Change to TRUE if you want to save the plot # Set working directory (Modify if necessary) script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path) setwd(script_dir) source("HW2_Library.R") ``` ##### Read data ```{r, echo=TRUE, results='asis'} climatol_data <- read.table("data/climatol_ann.txt", header=FALSE) names(climatol_data) <- c("Longitude", "Latitude", "Elevation", "AnnualPrecipitation") X <- climatol_data[,1:3] names(X) <- c("Long", "Lat", "Elev") prec_obs <- climatol_data$AnnualPrecipitation # Extract precipitation column precip <- prec_obs/10 #convert to cm ## Categorize precipitation to a binary variable q = 150 prec_bin = rep(1, length(precip)) for (i in 1:length(precip)) { if (precip[i] < q) { prec_bin[i] = 0 } } ``` ##### (i) Fit a ‘best’ GLM (i.e. logistic regression) with the appropriate link function using one of the objective functions. Test the model goodness using ANOVA Fit GLM ```{r, message=FALSE, warning=FALSE, echo=TRUE} glmfit = GLM_fit(prec_bin, X, family = "binomial") print(summary(glmfit)) ``` ##### Plot observed vs. predicted precipitation ```{r} prec_pred = glmfit$fitted.values par(mfrow=c(1,1)) lim=range(prec_bin,prec_pred) plot(prec_bin,prec_pred,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim) ``` ##### Test model goodness using ANOVA ```{r} anova(glmfit) ``` ##### (ii) Estimate the function on the grid and plot the surface. Also plot the standard error ```{r, echo=TRUE, results='show'} # Read the high-resolution elevation grid grid_data <- read.table("data/india-grid-topo.txt", header=FALSE) colnames(grid_data) <- c("Longitude", "Latitude", "Elevation") # Read the Rajeevan grid defining India's area rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE) colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation") # Perform an inner join to retain only points within India's defined region india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation")) covariate_names <- c("Longitude", "Latitude", "Elevation") X_grid <- india_grid[, covariate_names, drop=FALSE] colnames(X_grid) <- c("Long","Lat","Elev") # Predict precipitation probabilities using the fitted model india_grid$PredictedPrecip <- predict(glmfit, newdata = X_grid, type = "response") # Estimate standard error of predictions pred_se <- predict(glmfit, newdata = X_grid, se.fit = TRUE, type = "response") india_grid$StandardError <- pred_se$se.fit # Plot estimates and standard error on high-resolution grid #### Define Color Palettes prec_palette <- hcl.colors(10, palette = "Zissou 1") se_palette <- hcl.colors(10, palette = "Inferno") #### Plot 1: Predicted Precipitation Map g1 <- ggplot(india_grid) + geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) + # Use "Long" and "Lat" scale_fill_gradientn(name = "Precipitation", colours = prec_palette, na.value = "white") + theme_bw() + coord_fixed() + ggtitle("Spatial Distribution of Predicted Precipitation") #### Plot 2: Standard Error Map g2 <- ggplot(india_grid) + geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) + # Use "Long" and "Lat" scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") + theme_bw() + coord_fixed() + ggtitle("Spatial Distribution of Prediction Standard Error") #### Save Plots if (save) { ggsave("6-Predicted_Precipitation_Map.pdf", g1, width=8, height=6) ggsave("6-Standard_Error_Map.pdf", g2, width=8, height=6) } #### Display Plots print(g1) print(g2) ``` #### (iii) Repeat (i) and (ii) with Local GLM ##### Fit a local polynomial using the appropriate link function (i.e. Local GLM) ```{r, warning=FALSE} links = c("logit") N = length(prec_bin) combs = leaps(X, prec_bin, nbest=25) # Get up to 25 combinations for each # number of predictors combos = combs$which ncombos = length(combos[,1]) gcvs_m=1:ncombos glm_gcv=vector(length = length(links)) bestcombo=vector(length = length(links)) for(j in 1:length(links)) { sprintf("j",j) for(i in 1:ncombos) { xx = X[,combos[i,]] xx=as.data.frame(xx) zz=locpoly_fit(prec_bin, xx,family="binomial", link=links[j], glm=TRUE,plot=TRUE) gcvs_m[i] = zz$call$gcv } # Test using GCV objective function glm_gcv[j]=min(gcvs_m) bestcombo[j]=which.min(gcvs_m) } X_best = data.frame(X[,combos[bestcombo[which.min(glm_gcv)],]]) names(X_best) = names(X[combos[bestcombo[which.min(glm_gcv)],]]) locfit = locpoly_fit(prec_bin, X_best,family="binomial", link=links[which.min(glm_gcv)], glm=TRUE,plot=TRUE) locfit$combo=combos[bestcombo[which.min(glm_gcv)],] Xnew=X[,locfit$combo] summary(locfit) ``` ##### Plot observed vs. predicted precipitation ```{r} prec_pred = predict(locfit, Xnew, se.fit = T) par(mfrow=c(1,1)) lim=range(prec_bin,prec_pred$fit) plot(prec_bin,prec_pred$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim) ``` ##### Test model goodness using ANOVA ```{r} loc_Ftest(prec_bin,prec_pred, Xnew, locfit) ``` ##### Estimate the function on the grid and plot the surface. Also plot the standard error ```{r, echo=TRUE, results='show'} # Read the high-resolution elevation grid grid_data <- read.table("data/india-grid-topo.txt", header=FALSE) colnames(grid_data) <- c("Longitude", "Latitude", "Elevation") # Read the Rajeevan grid defining India's area rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE) colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation") # Perform an inner join to retain only points within India's defined region india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation")) covariate_names <- c("Longitude", "Latitude", "Elevation") X_grid <- india_grid[, covariate_names, drop=FALSE] colnames(X_grid) <- c("Long","Lat","Elev") # Predict precipitation probabilities using the fitted model india_grid$PredictedPrecip <- predict(locfit, newdata = X_grid, type = "response") # Estimate standard error of predictions pred_se <- predict(locfit, newdata = X_grid, se.fit = TRUE, type = "response") india_grid$StandardError <- pred_se$se.fit # Plot estimates and standard error on high-resolution grid #### Define Color Palettes prec_palette <- hcl.colors(10, palette = "Zissou 1") se_palette <- hcl.colors(10, palette = "Inferno") #### Plot 1: Predicted Precipitation Map g1 <- ggplot(india_grid) + geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) + # Use "Long" and "Lat" scale_fill_gradientn(name = "Precipitation", colours = prec_palette, na.value = "white") + theme_bw() + coord_fixed() + ggtitle("Spatial Distribution of Predicted Precipitation") #### Plot 2: Standard Error Map g2 <- ggplot(india_grid) + geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) + # Use "Long" and "Lat" scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") + theme_bw() + coord_fixed() + ggtitle("Spatial Distribution of Prediction Standard Error") #### Save Plots if (save) { ggsave("2-Predicted_Precipitation_Map.pdf", g1, width=8, height=6) ggsave("2-Standard_Error_Map.pdf", g2, width=8, height=6) } #### Display Plots print(g1) print(g2) ```