1. K-NN Conditional Ensemble Forecast/Simulation
Use K-NN bootstrapping approach to simulate/forecast Spring streamflow on the Colorado River at Lees Ferry based on suite of predictors, also called, ‘feature vectors’, at two lead times - Mar 1 and Apr 1. The feature vector includes - EPS ensemble mean forecast, winter climate indices (AMO and PDO) and SWE. You can experiment with all or subset of these variables.
The steps for each lead time are:

  1. For each year, \(t\), using the feature vector in that year, \(x_t\), obtain K-nearest neighbors from the historical feature vectors (of course, excluding the year, \(t\))
  2. Select one of the K neighbors using a weight metric. The selected neighbor corresponds to a historical year and with it the associated spring streamflow.
  3. Repeast steps (ii) say 100 times, to obtain ensemble simulation of streamflow for each year \(t\). Compute the mean or median of the ensemble to get a single value.
  4. Repeat steps (i) - (iii) for all the years and similarly for the two lead times.
  5. Plot the historic flow vs ensemble mean forecast along with the 1:1 line for visual comparison. Compute skill scores for each lead time - correlation between the historic flow and the mean of the ensemble forecast, also compute RPSS. Comment on what you find.
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#### Initialize Graphics
graphics.off()
.pardefault <- par()

#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 03/"
setwd(mainDir)
suppressPackageStartupMessages(source("hw3_library.R"))
source("hw3_library.R")
set.seed(10)

Read data

swe = as.matrix(readRDS("./data/prob1/springSWE_inches.Rds"))   # Snow water equivalent @ Mar 1 & Apr 1
clim = as.matrix(readRDS("./data/prob1/winterMeanClimateIndices.Rds")) # AMO, PDO, ENSO
data = as.matrix(readRDS("./data/prob1/springFlows.Rds"))    # Spring flow @ Lees Ferry
years = swe[,1]
clim = clim[-which(clim[,1] < years[1]),]     # only include years with full dataset
data = data[-which(data[,1] < years[1]),]     # only include years with full dataset
Compute terciles of observed historical spring flow
obs = as.data.frame(data)
q_maf = obs$q_maf
terc = quantile(q_maf, c(1/3, 2/3))
terc_obs = rep(NA, length(q_maf))
for (i in 1:length(q_maf)){
  if ( q_maf[i] < terc[1]){
    terc_obs[i]=1
  } else if ( q_maf[i] >= terc[1] & q_maf[i] < terc[2] ) {
    terc_obs[i]=2
  } else {
    terc_obs[i]=3
  }
}
climatology=c(1/3, 1/3, 1/3)

Use K-NN bootstrapping to simulate/forecast Spring stream flow based on suite of predictors at Mar 1 and Apr 1 lead times

nyrs = length(years)
nsim = 250      # number of simulations
# create matrix to store forecast data
ens_stat = as.data.frame(matrix(NA,nrow=nyrs,ncol=2))
names(ens_stat) = c("Mean","Median")
terc_forecast = matrix(NA,nrow=nyrs,ncol=3)

for (j in 2:3){
  par(mfrow = c(1,2), oma = c(2,0,3,1), mar = c(4,2,2,1))
  df = cbind(data,clim[,-1],swe[,j])   # construct record
  for (i in 1:nyrs){
    df.train = as.data.frame(subset(df, years != years[i])) # training dataset
    preds = as.data.frame(subset(df, years == years[i]))   # year to be predicted
    
    ### (i) For each year, t, obtain K-nearest neighbors from the historical feature vectors
    neighbs = knn(df.train, preds)
    
    ### (ii) Select K neighbors using a weight metric
    ### (iii) Obtain ensemble simulation of streamflow
    sims = neighbor_bootstrapping(neighbs, nsim)
    
    ### Compute terciles of predicted historical spring flow
    p1 = sum(sims < terc[1])/length(sims)
    p3 = sum(sims > terc[2])/length(sims)
    p2 = 1 - (p3 + p1)
    p=c(p1, p2, p3)
    
    terc_forecast[i,] = p
    
    ### Compute mean and median of ensemble
    ens_stat[i,1] = mean(sims)
    ens_stat[i,2] = median(sims)
  }
  ### (v) Plot the historic flow vs ensemble mean and median forecast and compute skill scores
  rmse = sqrt(mean((ens_stat$Mean-obs$q_maf)^2))
  rpss = rps(obs=terc_obs, pred=terc_forecast, baseline=climatology)$rpss
  corr = cor(obs$q_maf, ens_stat$Mean)

  plot(ens_stat$Mean, obs$q_maf, xlab = "Ensemble Mean", ylab = "Observed")
  abline(a=0, b=1, col='red')
  legend('bottomright', legend=c(paste('R =',round(corr,2), sep=''),
                                 paste('RPSS =', round(rpss,2), sep='')), cex=1.1)
  
  rmse = sqrt(mean((ens_stat$Median-obs$q_maf)^2))
  corr = cor(obs$q_maf, ens_stat$Median)
  
  plot(ens_stat$Median, obs$q_maf, xlab = "Ensemble Median", ylab = "")
  abline(a=0, b=1, col='red')
  legend('bottomright', legend=c(paste('R =',round(corr,2), sep=''),
                                 paste('RPSS =', round(rpss,2), sep='')), cex=1)
  
  if (j == 2){
    title(main = expression(paste("Forecasted Spring Flow for March ", 1^{"st"} ," Lead Time")), outer = T)
  }
  else {
    title(main = expression(paste("Forecasted Spring Flow for April ", 1^{"st"} ," Lead Time")), outer = T)
  }
}

Comments