library(sm)           # for sm.density in diagnostics
library(leaps)        # to provide combinations
library(MPV)          # for calculating objective function
library(akima)        # for interp to smooth for plotting
library(fields)       # for surface to plot surface plot
library(locfit)       # for fitting local polynomial
library(MASS)
library(VGAM)
library(nnet)
library(verification)
library(latex2exp)
library(maps)
#library(rgdal)        # for converting projection of coordinates
library(data.table)   #to use the melt
library(ggplot2)      #to plot, map_data, scale_fill_gradientn, etc
library(reshape2)
library(scales)      # for Visualization spatial map
library(spBayes)
library(geoR)
# For Trees
library(mclust)
library(tree)         # classification and regression trees
library(randomForest) # Random Forest
library(RColorBrewer) # Color schemes for maps
library(Hmisc)        # Add minor tick marks
library(wesanderson)
library(cluster)
library(kohonen)
library(parallel)

#### GLM Fitting Function
GLM_fit = function(prec_obs, X, family) {
  
  if (family == "gamma") {
    links = c("log", "inverse","identity")   
    
    # clean data and remove zeros
    prec_obs = ifelse(prec_obs <=0, runif(1, 0.0001, 0.001), prec_obs)
    
  } else if (family == "binomial"){
    links = c("logit")
  } else if (family == "gaussian"){
    links = c("identity")
  }
  
  N = length(prec_obs)
  
  combs = leaps(X,prec_obs, nbest=25)     #  GEt upto 25 combinations for each
  # number of predictors
  combos = combs$which
  ncombos = length(combos[,1])
  glm_val=vector(length = length(links))
  bestcombo=vector(length = length(links))
  obj_func=1:ncombos
  xmse = 1:ncombos
  
  for(j in 1:length(links)) {
    aux_var=1 # allow to fit glm
    if (aux_var==1)
    {for(i in 1:ncombos) {
      xx = X[,combos[i,]]
      xx=as.data.frame(xx)
      names(xx) = names(X[combos[i,]])
      if (family == "gamma"){
        zz=glm(prec_obs ~ ., data=xx, family = Gamma(link=links[j]))
      }else if (family == "binomial"){
        zz=glm(prec_obs ~ ., data=xx, family = binomial(link=links[j]))
      }else if (family == "gaussian"){
        zz=glm(prec_obs ~ ., data=xx, family = gaussian(link=links[j]))
      }
      obj_func[i]=AIC(zz)
      xmse[i] = sum((zz$res)^2) / (N - length(zz$coef))
    }}
    if (aux_var==1){
      # Test using AIC objective function
      glm_val[j]=min(obj_func)
      bestcombo[j]=which.min(obj_func)
    }else{
      # Test using AIC objective function
      glm_val[j]=200000
      bestcombo[j]=which.min(obj_func)
    }
  }
  
  press_df = data.frame(glm_val)
  rownames(press_df) = links[1:length(links)]
  
  print("Results of AIC for bestfit GLM")
  print(press_df)
  
  sprintf("Choosing the GLM which minimizes AIC: %s family and %s link function.", family, links[which.min(glm_val)])
  xx = X[,combos[bestcombo[which.min(glm_val)],]]
  xx = data.frame(xx)
  names(xx) = names(X[combos[bestcombo[which.min(glm_val)],]])
  if (family == "gamma") {
    bestmod = glm(prec_obs ~ ., data = xx, family = Gamma(link=links[which.min(glm_val)]))
  } else if (family == "binomial") {
    bestmod = glm(prec_obs ~ ., data = xx, family = binomial(link=links[which.min(glm_val)]))
  }  else if (family == "gaussian") {
    bestmod = glm(prec_obs ~ ., data = xx, family = gaussian(link=links[which.min(glm_val)]))
  } else { 
    print("Error!")
  }
  bestmod$Call$LINK=links[which.min(glm_val)]
  bestmod$Call$AIC=AIC(bestmod)
  bestmod$Call$combo=combos[bestcombo[which.min(glm_val)],]
  return(bestmod)
}

#### Quilt Plotting Function
Quilt_plotting=function(lon, lat, ypred, lon1, lat1, yob, type=""){
  if (type=="binary"){
    quilt.plot(lon, lat,ypred$fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Binary Precipitation on DEM Grid')
    US( add=TRUE, col="black", lwd=2)
    
    quilt.plot(lon1, lat1,yob,xlab="Longitude (m)",ylab="Latitude (m)",main='Actual Binary Precipitation')
    US( add=TRUE, col="black", lwd=2)
    
    quilt.plot(lon, lat,ypred$se.fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Standard Error on DEM Grid')
    US( add=TRUE, col="black", lwd=2)
  } else{
    par(mfrow=c(2,1))
    quilt.plot(lon, lat,ypred$fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Precipitation on DEM Grid (mm)')
    US( add=TRUE, col="black", lwd=2)
    
    quilt.plot(lon1, lat1,yob,xlab="Longitude (m)",ylab="Latitude (m)",main='Actual Precipitation (mm)')
    US( add=TRUE, col="black", lwd=2)
    
    quilt.plot(lon, lat,ypred$se.fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Standard Error on DEM Grid (mm)')
    US( add=TRUE, col="black", lwd=2)
  }
}

#### Drop 10% cross validation Function
Drop_10_pred = function(X,bestTree,type) {
  mod_data = X
  N = nrow(X)
  drop = 10
  nsample = 500
  i_full = 1:N
  # initialize skill score vectors
  skill_rmse = vector(mode="numeric", length=nsample)
  skill_cor = vector(mode="numeric", length=nsample)
  for (i in 1:nsample){
    if (type=="tree"){
      i_drop = sample(i_full,N*drop/100)            # can add argument replace=TRUE
      drop_dt = mod_data[-i_drop,] # drop 10% precip value
      myTree =tree(prec ~ PC1+PC2+PC3+PC4, data = drop_dt, model = T)
      drop_mod= prune.tree(myTree, best = bestTree)
      drop_pred=predict(drop_mod,newdata=mod_data[i_drop,])
      drop_actual = mod_data[i_drop,1]
      skill_rmse[i] = sqrt(mean((drop_actual - drop_pred)^2))
      skill_cor[i] = cor(drop_actual,drop_pred)
    }else{
      i_drop = sample(i_full,N*drop/100)            # can add argument replace=TRUE
      drop_dt = mod_data[-i_drop,] # drop 10% precip value
      drop_mod=randomForest(prec ~ PC1+PC2+PC3+PC4, data = drop_dt)
      drop_pred=predict(drop_mod,newdata=mod_data[i_drop,])
      drop_actual = mod_data[i_drop,1]
      skill_rmse[i] = sqrt(mean((drop_actual - drop_pred)^2))
      skill_cor[i] = cor(drop_actual,drop_pred)
    }
  }
  CV=as.data.frame(cbind(skill_rmse,skill_cor))
  names(CV)=c("rmse","cor")
  return(CV)
}
logit2prob = function(logit){
  odds <- exp(logit)
  prob <- odds / (1 + odds)
  return(prob)
}

#### Pruning Function
treeFun = function(myTree, toPlot = F, title = ""){
  
  #Perform CV on tree object
  cvTree <- cv.tree(myTree)
  optTree <- which.min(cvTree$dev)
  bestTree <- cvTree$size[optTree]
  
  if (bestTree == 1) {
    bestTree = length(myTree)
  }
  #prune Tree based on CV results
  pruneTree <- prune.tree(myTree, best = bestTree)
  
  #If plotting is selected
  if(toPlot){
    
    #Plot unpruned Tree
    plot(myTree)
    text(myTree, cex = .75)
    title(main = paste("Unpruned Tree for", title))
    
    #Plot CV
    plot(cvTree$size, cvTree$dev, type = "b", 
         main = paste("Cross Validation for", title))
    
    #Plot Prunned Tree
    plot(pruneTree)
    text(pruneTree, cex = .75)
    title(main = paste("Pruned Tree for", title))
  } 
  pruneTree$besTree=bestTree
  return(pruneTree)
}

#### Extreme Clustering Function
pam_fmado_ll = function (x, k, ll) {
  
  # x - Design matrix, typically colums are stations, rows are time 
  # k - Number of clusters
  # ll - two column matrix of lat long points (or preferably projected) with N rows
  
  N = ncol(x) # number of stations 
  T = nrow(x) # number of time points
  
  # compute the F-madogram distance
  V = array(NaN, dim = c(T, N))
  for (p in 1:N) {
    x.vec = as.vector(x[, p])
    Femp = ecdf(x.vec)(x.vec)
    V[, p] = Femp
  }
  DD = dist(t(V), method = "manhattan", diag = TRUE, upper = TRUE)/(2 * T)
  
  # weight by physical distance
  DDll = dist(ll,method='manhattan')
  DDw = as.matrix(DD) + t(t(as.matrix(DDll))/apply(as.matrix(DDll),2,max))*max(as.matrix(DD))
  
  # do the clustering
  output = pam(DDw, k, diss = TRUE, medoids = NULL)
  return(output)
}


#### multiplot Function
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}


#### Local Polynomial Fitting
#search for best alpha over a range of alpha values between 0 and 1

locpoly_fit = function(prec_obs, X, family="", link="", glm=FALSE,plot=FALSE) {
  nvar = ncol(X)    # number of variables
  N = nrow(X)       # number of data points
  
  if(glm==TRUE) {
    if (family == "gamma") {
      prec_obs = ifelse(prec_obs <=0, runif(1, 0.0001, 0.001), prec_obs)
    }
    porder=1
    minalpha=0.6
    alpha_grid=seq(minalpha,1.0,by=0.05)
    n=length(alpha_grid)
    
    porder=2
    minalpha=0.6
    alpha1_grid=seq(minalpha,1.0,by=0.05)
    alpha=alpha2_grid=c(alpha_grid,alpha1_grid)
    
    #get the GCV values for all the alpha values in alpha for order of
    # polynomial = 1 and 2. kern="bisq" argument is to use the bisquare kernel
    # in computing the weights of the neighbors, which are then used in
    # the weighted least squares..
    gcv_deg1=gcvplot(prec_obs ~ ., data=X, maxk = 100000, alpha=alpha_grid,deg=1,kern="bisq", ev=dat(),scale=TRUE,family=family,link=link)
    gcv_deg2=gcvplot(prec_obs ~ ., data=X, maxk = 100000, alpha=alpha1_grid,deg=2,kern="bisq",ev=dat(),scale=TRUE,family=family,link=link)
    # pick the best alpha and the degree of the polynomial that
    # gives the least GCV
    z2=order(c(gcv_deg1$values,gcv_deg2$values))
    bestdeg=1
    if(z2[1] > n)bestdeg=2
    best_alpha = alpha2_grid[z2[1]]
    best_gcv = c(gcv_deg1$values,gcv_deg2$values)[z2[1]]
    output=c(bestdeg, best_alpha, best_gcv)    #the best parameter set
    
    # Now fit the LOCFIT model using the best alpha and degree obtained from above..
    if (plot==FALSE){
      bestmod=locfit(prec_obs ~., data=X, alpha=best_alpha, maxk = 10000, deg=bestdeg,kern="bisq"
                     , scale = T, family=family, link=link,ev=dat())
    }else {
      bestmod=locfit(prec_obs ~., data=X, alpha=best_alpha, maxk = 10000, deg=bestdeg,kern="bisq"
                     , scale = T, family=family, link=link)
    }
    
    bestmod$call$alpha = best_alpha
    bestmod$call$deg = bestdeg
    bestmod$call$family = family
    bestmod$call$link = link
    bestmod$call$gcv = best_gcv
  }
  else{
    porder=1
    minalpha=2*(nvar*porder+1)/N
    alpha_grid=seq(minalpha,1.0,by=0.05)
    n=length(alpha_grid)
    
    porder=2
    minalpha=2*(nvar*porder+1)/N
    alpha1_grid=seq(minalpha,1.0,by=0.05)
    alpha=alpha2_grid=c(alpha_grid,alpha1_grid)
    
    #get the GCV values for all the alpha values in alpha for order of
    # polynomial = 1 and 2. kern="bisq" argument is to use the bisquare kernel
    # in computing the weights of the neighbors, which are then used in
    # the weighted least squares..
    gcv_deg1=gcvplot(prec_obs ~ ., data=X, maxk = 100000, alpha=alpha_grid,deg=1,kern="bisq", ev=dat(),scale=TRUE)
    gcv_deg2=gcvplot(prec_obs ~ ., data=X, maxk = 100000, alpha=alpha1_grid,deg=2,kern="bisq",ev=dat(),scale=TRUE)
    # pick the best alpha and the degree of the polynomial that
    # gives the least GCV
    z2=order(c(gcv_deg1$values,gcv_deg2$values))
    bestdeg=1
    if(z2[1] > n)bestdeg=2
    best_alpha = alpha2_grid[z2[1]]
    best_gcv = c(gcv_deg1$values,gcv_deg2$values)[z2[1]]
    output=c(bestdeg, best_alpha, best_gcv)    #the best parameter set
    
    # Now fit the LOCFIT model using the best alpha and degree obtained from above..
    if (plot==FALSE){
      bestmod=locfit(prec_obs ~., data=X, alpha=best_alpha, maxk = 10000, deg=bestdeg,kern="bisq",ev=dat())
    }else{
      bestmod=locfit(prec_obs ~., data=X, alpha=best_alpha, maxk = 10000, deg=bestdeg,kern="bisq")
    }
    
    bestmod$call$alpha = best_alpha
    bestmod$call$deg = bestdeg
    bestmod$call$gcv = best_gcv
    
  }
  
  return(bestmod)
  
}

#### F-test for Local Polynomial Fitting
loc_Ftest = function(prec_obs,prec_hat,X, bestmod) {
  
  N=length(prec_obs) #number ofdata points
  RSS1 = sum((prec_obs-prec_hat$fit)^2)
  
  nu1 = bestmod$dp[6] # trace(L) [lk]
  nu2 = bestmod$dp[7] # trace(L^T L) [df1]
  
  nu11 = N-2*nu1 + nu2
  
  #linear regression ###
  # #linear regression
  X=as.matrix(X)
  zzLin=lm(prec_obs~X)
  
  XX = cbind(rep(1,N), X)
  # Compute the Hat matrix
  hatm = XX %*% solve(t(XX) %*% XX) %*% t(XX)
  
  II = diag(N)
  delta0 = t(II-hatm)%*%(II-hatm)    #Equation 9.2
  nu00 = sum(diag(delta0))
  
  RSS0 = sum(residuals(zzLin)^2)
  
  
  Fdata = (RSS0 - RSS1)/(nu00 - nu11)
  Fdata = (Fdata / (RSS1 / nu11))
  Ftheor = qf(0.95,(nu00-nu11), nu11)    #95% confidence level..
  
  ## Fdata > Ftheor   - reject null - i.e., data is otherwise (local polynomial)
  if (Fdata > Ftheor) {
    print("F-test:")
    print(sprintf("Reject the Null because F(local poly) = %0.2f > %0.2f = F(linear model).", Fdata, Ftheor))
  }
}