#### Anna Starodubtseva
#### CVEN 6833, HW 01
#### Problem 8 - Additive Spatial Model
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
# Other
save = FALSE
#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 01/R"
setwd(mainDir)
suppressPackageStartupMessages(source("hw1_library.R"))
source("hw1_library.R")
library(fields)
## Read data
# Load winter precipitation data
data = read.table(paste(mainDir, "/data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
# Omit extreme values
non_values <- which(data[,-1] > 1000, arr.ind = T)
data[non_values] = NaN
data = na.omit(data)
prec_obs = colMeans(data[,-1]) # Mean of location columns
# Load location data
loc = read.table(paste(mainDir, "/data/Precipitaton_data.txt", sep = ""), header = T)
## Create design matrix
X = loc[,1:3]
names(X) = c("Long","Lat","Elev")
lon = X[,1]
lat = X[,2]
elev = X[,3]
precip = prec_obs/10 # Convert precipitation to cm
xs = cbind(lon, lat)
xe = cbind(lon, lat, elev)
###################################################
############### (i) Fit variogram #################
###################################################
xe = cbind(lon, lat, elev)
# Compute and plot variogram
par(mar = c(4,4,1,1))
look <- vgram(xs, precip, N = 10, lon.lat = T)
bplot.xy(look$d, look$vgram, breaks = look$breaks, outline = F, xlab = "Distance (degrees)", ylab = "Variogram")
points(look$centers, look$stats[2,], col = "blue")
# Fit variogram using nonlinear least squares
xd <- look$centers
ym <- look$stats[2,]
sigma <- 0 # Nugget
nls.fit <- nls(ym ~ sigma^2 + rho*(1-exp(-xd/theta)), start = list(rho = max(look$stats[2,]), theta = quantile(xd,0.1)), control = list(maxiter = 50000, minFactor = 1e-16))
pars <- coefficients(nls.fit)
rho <- pars[1]
theta <- pars[2]
xr = round(max(xd)+sd(xd))
dgrid = seq(0, xr, length.out = 1000)
lines(dgrid, sigma^2 + rho*(1 - exp(-1*dgrid/theta)), col = "blue", lwd = "3")

# Check estimates against observed values
fit = spatialProcess(xs, precip, Z = elev)
prec_hat = predict(fit)*10
par(mfrow=c(1,1))
lim=range(prec_obs,prec_hat)
plot(prec_obs,prec_hat,xlab="Actual Precipitation (mm)",ylab="Estimated Precipitation (mm)",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)

######################################################
## (iv) Compute drop-one cross-validated estimates ###
### and scatterplot against observed values ##########
######################################################
N = length(precip)
loocv = vector(mode = "numeric", length = N)
for (i in 1:N) {
mod_drop = spatialProcess(xs[-i,], precip[-i]*10, Z = elev[-i])
x_drop = cbind(xs[i,1], xs[i,2])
loocv[i] = predict(mod_drop, x = x_drop, Z = elev[i])
}
loocv = loocv # Convert to mm
# Plot predicted values against observed
if (save==TRUE){
pdf("LOOCV.pdf", width = 8, height = 6) # save figure
lim = range(prec_obs, loocv)
plot(prec_obs, loocv, xlim = lim, ylim=lim, xlab="Observed", ylab="X-validated Estimate", main= "LOOCV" )
abline(a=0,b=1,col = "red")
dev.off()
}else{
lim = range(prec_obs, loocv)
plot(prec_obs, loocv, xlim = lim, ylim=lim, xlab="Observed", ylab="X-validated Estimate", main= "LOOCV" )
abline(a=0,b=1,col = "red")
}

#####################################################################################
##### (v) Drop 10% of observations, fit a best model, predict dropped points. #######
##### Compute RMSE and correlation and show them as boxplots ########################
#####################################################################################
# Initialize skill score vectors
i_full = 1:N
skill_rmse = vector(mode = "numeric", length = N)
skill_cor = vector(mode = "numeric", length = N)
drop = read.table("p8_drop10", header = F)
drop = as.matrix(drop)
for (i in 1:N) {
i_drop = drop[i,]
mod_drop = spatialProcess(xs[-i_drop,], precip[-i_drop]*10, Z = elev[-i_drop])
x_drop = cbind(xs[i_drop,1], xs[i_drop,2])
prec_pred = predict(mod_drop, x = x_drop, Z = xe[i_drop,3])
prec_actual = precip[i_drop]
skill_rmse[i] = sqrt(mean((prec_actual - prec_pred)^2))
skill_cor[i] = cor(prec_actual, prec_pred)
}
# Plot skill of model based on Drop 10% method
if (save==TRUE){
pdf("boxplots.pdf", width = 8, height = 6) # save figure
par(mfrow=c(1,2))
boxplot(skill_rmse, main = "RMSE-Skill", ylim = range(skill_rmse))
boxplot(skill_cor, main = "Cor-Skill", ylim=range(skill_cor))
dev.off()
}else{
par(mfrow=c(1,2))
boxplot(skill_rmse, main = "RMSE-Skill", ylim = range(skill_rmse))
boxplot(skill_cor, main = "Cor-Skill", ylim=range(skill_cor))
}

######################################################################################
##### (vi) Spatially map the model estimates and standard error from the best model ##
########## on the high-resolution grid ###############################################
######################################################################################
elev_grid=read.delim(paste(mainDir, "/data/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
elev_hr = elev_grid[,3]
xps = cbind(lon,lat)
# Predict precipitation over high-resolution grid
ghat = predict(fit, x = xps, Z = elev_hr)
kse = predictSE(fit, x = xps, Z = elev_hr)
# Plot estimates and standard error on high-resolution grid
kse1 = data.matrix(kse)
ypred = data.frame(cbind(ghat*10,kse1*10))
names(ypred) = c("fit","se")
plot_surface(xps[,1],xps[,2],ypred,xs[,1],xs[,2],prec_obs,save)

