#### Clear Memory and Set Options
rm(list=ls())
options(warn=-1)
save <- TRUE # Change to TRUE if you want to save the plot
# Set working directory (Modify if necessary)
script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(script_dir)
source("HW2_Library.R")
climatol_data <- read.table("data/climatol_ann.txt", header=FALSE)
names(climatol_data) <- c("Longitude", "Latitude", "Elevation", "AnnualPrecipitation")
X <- climatol_data[,1:3]
names(X) <- c("Long", "Lat", "Elev")
prec_obs <- climatol_data$AnnualPrecipitation # Extract precipitation column
precip <- prec_obs/10 #convert to cm
## Categorize precipitation to a binary variable
q = 150
prec_bin = rep(1, length(precip))
for (i in 1:length(precip)) {
if (precip[i] < q) {
prec_bin[i] = 0
}
}
Fit GLM
glmfit = GLM_fit(prec_bin, X, family = "binomial")
## [1] "Results of AIC for bestfit GLM"
## glm_val
## logit 170.369
print(summary(glmfit))
##
## Call:
## glm(formula = prec_obs ~ ., family = binomial(link = links[which.min(glm_val)]),
## data = xx)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -17.37216 3.22415 -5.388 7.12e-08 ***
## Long 0.23332 0.04455 5.238 1.63e-07 ***
## Lat -0.15704 0.04352 -3.609 0.000308 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 204.82 on 245 degrees of freedom
## Residual deviance: 164.37 on 243 degrees of freedom
## AIC: 170.37
##
## Number of Fisher Scoring iterations: 6
prec_pred = glmfit$fitted.values
par(mfrow=c(1,1))
lim=range(prec_bin,prec_pred)
plot(prec_bin,prec_pred,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
anova(glmfit)
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: prec_obs
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 245 204.82
## Long 1 25.797 244 179.03 3.793e-07 ***
## Lat 1 14.659 243 164.37 0.0001289 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Read the high-resolution elevation grid
grid_data <- read.table("data/india-grid-topo.txt", header=FALSE)
colnames(grid_data) <- c("Longitude", "Latitude", "Elevation")
# Read the Rajeevan grid defining India's area
rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE)
colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation")
# Perform an inner join to retain only points within India's defined region
india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation"))
covariate_names <- c("Longitude", "Latitude", "Elevation")
X_grid <- india_grid[, covariate_names, drop=FALSE]
colnames(X_grid) <- c("Long","Lat","Elev")
# Predict precipitation probabilities using the fitted model
india_grid$PredictedPrecip <- predict(glmfit, newdata = X_grid, type = "response")
# Estimate standard error of predictions
pred_se <- predict(glmfit, newdata = X_grid, se.fit = TRUE, type = "response")
india_grid$StandardError <- pred_se$se.fit
# Plot estimates and standard error on high-resolution grid
#### Define Color Palettes
prec_palette <- hcl.colors(10, palette = "Zissou 1")
se_palette <- hcl.colors(10, palette = "Inferno")
#### Plot 1: Predicted Precipitation Map
g1 <- ggplot(india_grid) +
geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) + # Use "Long" and "Lat"
scale_fill_gradientn(name = "Precipitation", colours = prec_palette, na.value = "white") +
theme_bw() +
coord_fixed() +
ggtitle("Spatial Distribution of Predicted Precipitation")
#### Plot 2: Standard Error Map
g2 <- ggplot(india_grid) +
geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) + # Use "Long" and "Lat"
scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") +
theme_bw() +
coord_fixed() +
ggtitle("Spatial Distribution of Prediction Standard Error")
#### Save Plots
if (save) {
ggsave("6-Predicted_Precipitation_Map.pdf", g1, width=8, height=6)
ggsave("6-Standard_Error_Map.pdf", g2, width=8, height=6)
}
#### Display Plots
print(g1)
print(g2)
links = c("logit")
N = length(prec_bin)
combs = leaps(X, prec_bin, nbest=25) # Get up to 25 combinations for each
# number of predictors
combos = combs$which
ncombos = length(combos[,1])
gcvs_m=1:ncombos
glm_gcv=vector(length = length(links))
bestcombo=vector(length = length(links))
for(j in 1:length(links)) { sprintf("j",j)
for(i in 1:ncombos) {
xx = X[,combos[i,]]
xx=as.data.frame(xx)
zz=locpoly_fit(prec_bin, xx,family="binomial", link=links[j], glm=TRUE,plot=TRUE)
gcvs_m[i] = zz$call$gcv
}
# Test using GCV objective function
glm_gcv[j]=min(gcvs_m)
bestcombo[j]=which.min(gcvs_m)
}
X_best = data.frame(X[,combos[bestcombo[which.min(glm_gcv)],]])
names(X_best) = names(X[combos[bestcombo[which.min(glm_gcv)],]])
locfit = locpoly_fit(prec_bin, X_best,family="binomial", link=links[which.min(glm_gcv)], glm=TRUE,plot=TRUE)
locfit$combo=combos[bestcombo[which.min(glm_gcv)],]
Xnew=X[,locfit$combo]
summary(locfit)
## Estimation type: Local Likelihood - Binomial
##
## Call:
## locfit(formula = prec_obs ~ ., data = X, alpha = 0.75, maxk = 10000,
## deg = 2, kern = "bisq", scale = T, family = "binomial", link = "logit",
## gcv = 0.190198530525885)
##
## Number of data points: 246
## Independent variables: Long Lat Elev
## Evaluation structure: Rectangular Tree
## Number of evaluation points: 99
## Degree of fit: 2
## Fitted Degrees of Freedom: 28.626
prec_pred = predict(locfit, Xnew, se.fit = T)
par(mfrow=c(1,1))
lim=range(prec_bin,prec_pred$fit)
plot(prec_bin,prec_pred$fit,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
loc_Ftest(prec_bin,prec_pred, Xnew, locfit)
## [1] "F-test:"
## [1] "Reject the Null because F(local poly) = 4.07 > 1.46 = F(linear model)."
# Read the high-resolution elevation grid
grid_data <- read.table("data/india-grid-topo.txt", header=FALSE)
colnames(grid_data) <- c("Longitude", "Latitude", "Elevation")
# Read the Rajeevan grid defining India's area
rajeevan_grid = read.table("data/2b-Rajeevan_grid_with_distances.txt", header=TRUE)
colnames(rajeevan_grid)[1:3] <- c("Longitude", "Latitude", "Elevation")
# Perform an inner join to retain only points within India's defined region
india_grid = merge(rajeevan_grid, grid_data, by = c("Longitude", "Latitude", "Elevation"))
covariate_names <- c("Longitude", "Latitude", "Elevation")
X_grid <- india_grid[, covariate_names, drop=FALSE]
colnames(X_grid) <- c("Long","Lat","Elev")
# Predict precipitation probabilities using the fitted model
india_grid$PredictedPrecip <- predict(locfit, newdata = X_grid, type = "response")
# Estimate standard error of predictions
pred_se <- predict(locfit, newdata = X_grid, se.fit = TRUE, type = "response")
india_grid$StandardError <- pred_se$se.fit
# Plot estimates and standard error on high-resolution grid
#### Define Color Palettes
prec_palette <- hcl.colors(10, palette = "Zissou 1")
se_palette <- hcl.colors(10, palette = "Inferno")
#### Plot 1: Predicted Precipitation Map
g1 <- ggplot(india_grid) +
geom_tile(aes(x = Longitude, y = Latitude, fill = PredictedPrecip)) + # Use "Long" and "Lat"
scale_fill_gradientn(name = "Precipitation", colours = prec_palette, na.value = "white") +
theme_bw() +
coord_fixed() +
ggtitle("Spatial Distribution of Predicted Precipitation")
#### Plot 2: Standard Error Map
g2 <- ggplot(india_grid) +
geom_tile(aes(x = Longitude, y = Latitude, fill = StandardError)) + # Use "Long" and "Lat"
scale_fill_gradientn(name = "Std. Error", colours = se_palette, na.value = "white") +
theme_bw() +
coord_fixed() +
ggtitle("Spatial Distribution of Prediction Standard Error")
#### Save Plots
if (save) {
ggsave("2-Predicted_Precipitation_Map.pdf", g1, width=8, height=6)
ggsave("2-Standard_Error_Map.pdf", g2, width=8, height=6)
}
#### Display Plots
print(g1)
print(g2)