#### Anna Starodubtseva
#### CVEN 6833, HW 01
#### Problem 8 - Additive Spatial Model

#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)

# Other
save = FALSE

#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 01/R"
setwd(mainDir)
suppressPackageStartupMessages(source("hw1_library.R"))
source("hw1_library.R")
library(fields)

## Read data
# Load winter precipitation data
data = read.table(paste(mainDir, "/data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
# Omit extreme values
non_values <- which(data[,-1] > 1000, arr.ind = T)
data[non_values] = NaN
data = na.omit(data)
prec_obs = colMeans(data[,-1])  # Mean of location columns
# Load location data
loc = read.table(paste(mainDir, "/data/Precipitaton_data.txt", sep = ""), header = T)

## Create design matrix
X = loc[,1:3]
names(X) = c("Long","Lat","Elev")
lon = X[,1]
lat = X[,2]
elev = X[,3]
precip = prec_obs/10    # Convert precipitation to cm

xs = cbind(lon, lat)
xe = cbind(lon, lat, elev)

###################################################
############### (i) Fit variogram #################
###################################################
xe = cbind(lon, lat, elev)
# Compute and plot variogram
par(mar = c(4,4,1,1))
look <- vgram(xs, precip, N = 10, lon.lat = T)
bplot.xy(look$d, look$vgram, breaks = look$breaks, outline = F, xlab = "Distance (degrees)", ylab = "Variogram")
points(look$centers, look$stats[2,], col = "blue")

# Fit variogram using nonlinear least squares
xd <- look$centers
ym <- look$stats[2,]
sigma <- 0      # Nugget
nls.fit <- nls(ym ~ sigma^2 + rho*(1-exp(-xd/theta)), start = list(rho = max(look$stats[2,]), theta = quantile(xd,0.1)), control = list(maxiter = 50000, minFactor = 1e-16))
pars <- coefficients(nls.fit)
rho <- pars[1]
theta <- pars[2]
xr = round(max(xd)+sd(xd))
dgrid = seq(0, xr, length.out = 1000)
lines(dgrid, sigma^2 + rho*(1 - exp(-1*dgrid/theta)), col = "blue", lwd = "3")

# Check estimates against observed values
fit = spatialProcess(xs, precip, Z = elev)
prec_hat = predict(fit)*10

par(mfrow=c(1,1))
lim=range(prec_obs,prec_hat)
plot(prec_obs,prec_hat,xlab="Actual Precipitation (mm)",ylab="Estimated Precipitation (mm)",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)

######################################################
## (iv) Compute drop-one cross-validated estimates ###
### and scatterplot against observed values ##########
######################################################
N = length(precip)
loocv = vector(mode = "numeric", length = N)

for (i in 1:N) {
    mod_drop = spatialProcess(xs[-i,], precip[-i]*10, Z = elev[-i])
    x_drop = cbind(xs[i,1], xs[i,2])
    loocv[i] = predict(mod_drop, x = x_drop, Z = elev[i])
}

loocv = loocv       # Convert to mm
# Plot predicted values against observed
if (save==TRUE){
    pdf("LOOCV.pdf", width = 8, height = 6) # save figure
    lim = range(prec_obs, loocv)
    plot(prec_obs, loocv, xlim = lim, ylim=lim, xlab="Observed", ylab="X-validated Estimate", main= "LOOCV" )
    abline(a=0,b=1,col = "red")
    dev.off()
  }else{
    lim = range(prec_obs, loocv)
    plot(prec_obs, loocv, xlim = lim, ylim=lim, xlab="Observed", ylab="X-validated Estimate", main= "LOOCV" )
    abline(a=0,b=1,col = "red")
  }

#####################################################################################
##### (v) Drop 10% of observations, fit a best model, predict dropped points. #######
##### Compute RMSE and correlation and show them as boxplots ########################
#####################################################################################
# Initialize skill score vectors
i_full = 1:N
skill_rmse = vector(mode = "numeric", length = N)
skill_cor = vector(mode = "numeric", length = N)
drop = read.table("p8_drop10", header = F)
drop = as.matrix(drop)

for (i in 1:N) {
    i_drop = drop[i,]
    mod_drop = spatialProcess(xs[-i_drop,], precip[-i_drop]*10, Z = elev[-i_drop])
    x_drop = cbind(xs[i_drop,1], xs[i_drop,2])
    prec_pred = predict(mod_drop, x = x_drop, Z = xe[i_drop,3])
    
    prec_actual = precip[i_drop]
    skill_rmse[i] = sqrt(mean((prec_actual - prec_pred)^2))
    skill_cor[i] = cor(prec_actual, prec_pred)
}

# Plot skill of model based on Drop 10% method
if (save==TRUE){
  pdf("boxplots.pdf", width = 8, height = 6) # save figure
  par(mfrow=c(1,2))
  boxplot(skill_rmse, main = "RMSE-Skill", ylim = range(skill_rmse))
  boxplot(skill_cor, main = "Cor-Skill", ylim=range(skill_cor))
  dev.off()
}else{
  par(mfrow=c(1,2))
  boxplot(skill_rmse, main = "RMSE-Skill", ylim = range(skill_rmse))
  boxplot(skill_cor, main = "Cor-Skill", ylim=range(skill_cor))
}

######################################################################################
##### (vi) Spatially map the model estimates and standard error from the best model ##
########## on the high-resolution grid ###############################################
######################################################################################
elev_grid=read.delim(paste(mainDir, "/data/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
elev_hr = elev_grid[,3]
xps = cbind(lon,lat)

# Predict precipitation over high-resolution grid
ghat = predict(fit, x = xps, Z = elev_hr)
kse = predictSE(fit, x = xps, Z = elev_hr)

# Plot estimates and standard error on high-resolution grid
kse1 = data.matrix(kse)
ypred = data.frame(cbind(ghat*10,kse1*10))
names(ypred) = c("fit","se")
plot_surface(xps[,1],xps[,2],ypred,xs[,1],xs[,2],prec_obs,save)