#### Problem 9 - Homework 1 for CVEN6833 - Estimate the spatial surface of January precipitation using Kriging
#### Author: Alvaro Ossandon

#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)

#data required: 
Month=1 # 1: Monthly precipitation of January; 2: Monthly precipitation of July
save=FALSE # True for saving figure as pdf file;

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW1"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1.R"))
source("Lib_HW1.R")
library(fields)
#### create a folder to save the figures
if (Month==1){
  diraux=paste(mainDir,"/Montly_Precep_January",sep = "")  
}else{
  diraux=paste(mainDir,"/Montly_Precep_July",sep = "")
}
setwd(diraux)
subDir="Kriging"
if (file.exists(subDir)){
  setwd(file.path(diraux, subDir))
} else {
  dir.create(file.path(diraux, subDir))
  setwd(file.path(diraux, subDir))
}

## Read data as table (data frame)
test = read.table(paste("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_monthly_precip_0",Month,".dat",sep=""))
#assign names for columns/variables:
names(test)=c("Lat","Long","Elev","Pm")
test1=test
#filtering the -999 values
non_values<- which(test[,4]<0)
test[non_values,4]=NaN
test=na.omit(test)
Pm = test[,4]     #Dependent Variable
X = test[,1:3]# Independent variable.
precip =Pm/10  #convert to cm


xs = cbind(X[,2],X[,1])

## Compute empirical variogram and plot ...
if (save==TRUE){
  pdf("Variogram.pdf", width = 8, height = 6) # save figure
}
par(mar=c(4,4,1,1))
look<- vgram(xs, precip, N=10, lon.lat=TRUE)
bplot.xy( look$d, look$vgram, breaks= look$breaks,
          outline=FALSE,
          xlab="distance (degrees)", ylab="variogram")
points( look$centers, look$stats[2,], col="blue")

# fit of exponential by nonlinear least squares
xd<- look$centers
ym<- look$stats[2,]
sigma<- 1 # 
nls.fit<- nls( ym ~ sigma^2 + rho*( 1- exp(-xd/theta)),
               start= list(rho=max(look$stats[2,]), theta=quantile(xd,0.1)), control=list( maxiter=50000,                                                                                           minFactor=1e-16) )
pars<- coefficients(nls.fit)
rho<-pars[1]
theta<-pars[2]
xr = round(max(xd)+sd(xd))
dgrid=seq(0,xr,length.out=400)
lines(dgrid, sigma^2 + rho*(1 - exp(-1*dgrid/theta)), col="blue", lwd=3)

if (save==TRUE){
  dev.off() 
}
### Predict at observation points.. is sigma = 0 then it will be exact.
sigma1=1
zz=Krig(xs,precip,rho=rho,theta=theta,m=1,sigma2=sigma1)
zz$theta=theta
Pmhat= predict.Krig(zz)


# Observed versus estimates
if (save==TRUE){
  pdf("Precipitation_Scatterplot.pdf", width = 8, height = 6) # save figure
}
par(mfrow=c(1,1))
lim=range(Pm,Pmhat*10)# values in mm
plot(Pm,Pmhat*10,xlab="Actual Precipitation",ylab="Estimated Precipitation",main="Actual vs Estimated Precipitation", xlim=lim, ylim=lim)
abline(a=0,b=1)

if (save==TRUE){
  dev.off() 
}
###################################################################
######## III. model diagnostics of kriging
###################################################################

### MODEL DIAGNOSTICS:
Pmest = Pmhat*10  # model's predicted values of Pm 
nvar = 2                   # number of variables 
mod_diagnostics(Pm, Pmest, nvar,save)

###############################################################################################
#### V. Drop 10% of obs, fit best model, predict dropped points. Compute RMSE and R2. Boxplot.
## validation nsample times (dropping a new 10% each time), and outputs boxplots of RMSE and R2.
###############################################################################################
mod_data = xs
N = length(precip)
drop = 10
nsample = 500
i_full = 1:N
# initialize skill score vectors
skill_rmse = vector(mode="numeric", length=nsample)
skill_cor = vector(mode="numeric", length=nsample)
for (i in 1:nsample){
  i_drop = sample(i_full,N*drop/100)            # can add argument replace=TRUE
  
  drop_dt = mod_data[-i_drop,] # drop 10% precip value
  # slightly different cases for different probl
  Pm1=precip[-i_drop]
  drop_mod=Krig(drop_dt,Pm1,rho=rho,theta=theta,m=1,sigma2=sigma1)
  drop_pred=predict.Krig(drop_mod,x=mod_data[i_drop,],drop.Z=TRUE)
  
  drop_actual =Pm[i_drop] #convert to mm 
  skill_rmse[i] = sqrt(mean((drop_actual - drop_pred*10)^2))
  skill_cor[i] = cor(drop_actual,drop_pred*10)
}
# Plot skill of model based on Drop 10% method
if(save==TRUE){
  pdf("boxplots.pdf", width = 8, height = 6) # save figure
  par(mfrow=c(1,2))
  boxplot(skill_rmse, main = "RMSE-Skill", ylim = range(skill_rmse))
  boxplot(skill_cor, main = "Cor-Skill", ylim=range(skill_cor))
  dev.off()
}else{
  par(mfrow=c(1,2))
  boxplot(skill_rmse, main = "RMSE-Skill", ylim = range(skill_rmse))
  boxplot(skill_cor, main = "Cor-Skill", ylim=range(skill_cor))
}

###################################################################
## VI. Spatially map the model estimates and the standard error ####
###################################################################
#  read the topography map
predpts = read.table("http://cires1.colorado.edu/~aslater/CVEN_6833/colo_dem.dat")
lat=predpts$V1
lon=predpts$V2
elev=predpts$V3
xps=cbind(lon,lat)

yp = predict.Krig(zz,x=xps,drop.Z=TRUE)
kse = predictSE.Krig(zz, x = xps)
kse1=data.matrix(kse)
ypred1=cbind(yp*10,kse1*10)
ypred=as.data.frame(ypred1)
names(ypred)=c("fit","se")
plot_surface(xps[,1],xps[,2],ypred,xs[,1],xs[,2],Pm,save)