1. K-NN Conditional Ensemble Forecast/Simulation
Use K-NN bootstrapping approach to simulate/forecast Spring streamflow on the Colorado River at Lees Ferry based on suite of predictors, also called, ‘feature vectors’, at two lead times - Mar 1 and Apr 1. The feature vector includes - EPS ensemble mean forecast, winter climate indices (AMO and PDO) and SWE. You can experiment with all or subset of these variables.
The steps for each lead time are:
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#### Initialize Graphics
graphics.off()
.pardefault <- par()
#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 03/"
setwd(mainDir)
suppressPackageStartupMessages(source("hw3_library.R"))
source("hw3_library.R")
set.seed(10)
swe = as.matrix(readRDS("./data/prob1/springSWE_inches.Rds")) # Snow water equivalent @ Mar 1 & Apr 1
clim = as.matrix(readRDS("./data/prob1/winterMeanClimateIndices.Rds")) # AMO, PDO, ENSO
data = as.matrix(readRDS("./data/prob1/springFlows.Rds")) # Spring flow @ Lees Ferry
years = swe[,1]
clim = clim[-which(clim[,1] < years[1]),] # only include years with full dataset
data = data[-which(data[,1] < years[1]),] # only include years with full dataset
obs = as.data.frame(data)
q_maf = obs$q_maf
terc = quantile(q_maf, c(1/3, 2/3))
terc_obs = rep(NA, length(q_maf))
for (i in 1:length(q_maf)){
if ( q_maf[i] < terc[1]){
terc_obs[i]=1
} else if ( q_maf[i] >= terc[1] & q_maf[i] < terc[2] ) {
terc_obs[i]=2
} else {
terc_obs[i]=3
}
}
climatology=c(1/3, 1/3, 1/3)
nyrs = length(years)
nsim = 250 # number of simulations
# create matrix to store forecast data
ens_stat = as.data.frame(matrix(NA,nrow=nyrs,ncol=2))
names(ens_stat) = c("Mean","Median")
terc_forecast = matrix(NA,nrow=nyrs,ncol=3)
for (j in 2:3){
par(mfrow = c(1,2), oma = c(2,0,3,1), mar = c(4,2,2,1))
df = cbind(data,clim[,-1],swe[,j]) # construct record
for (i in 1:nyrs){
df.train = as.data.frame(subset(df, years != years[i])) # training dataset
preds = as.data.frame(subset(df, years == years[i])) # year to be predicted
### (i) For each year, t, obtain K-nearest neighbors from the historical feature vectors
neighbs = knn(df.train, preds)
### (ii) Select K neighbors using a weight metric
### (iii) Obtain ensemble simulation of streamflow
sims = neighbor_bootstrapping(neighbs, nsim)
### Compute terciles of predicted historical spring flow
p1 = sum(sims < terc[1])/length(sims)
p3 = sum(sims > terc[2])/length(sims)
p2 = 1 - (p3 + p1)
p=c(p1, p2, p3)
terc_forecast[i,] = p
### Compute mean and median of ensemble
ens_stat[i,1] = mean(sims)
ens_stat[i,2] = median(sims)
}
### (v) Plot the historic flow vs ensemble mean and median forecast and compute skill scores
rmse = sqrt(mean((ens_stat$Mean-obs$q_maf)^2))
rpss = rps(obs=terc_obs, pred=terc_forecast, baseline=climatology)$rpss
corr = cor(obs$q_maf, ens_stat$Mean)
plot(ens_stat$Mean, obs$q_maf, xlab = "Ensemble Mean", ylab = "Observed")
abline(a=0, b=1, col='red')
legend('bottomright', legend=c(paste('R =',round(corr,2), sep=''),
paste('RPSS =', round(rpss,2), sep='')), cex=1.1)
rmse = sqrt(mean((ens_stat$Median-obs$q_maf)^2))
corr = cor(obs$q_maf, ens_stat$Median)
plot(ens_stat$Median, obs$q_maf, xlab = "Ensemble Median", ylab = "")
abline(a=0, b=1, col='red')
legend('bottomright', legend=c(paste('R =',round(corr,2), sep=''),
paste('RPSS =', round(rpss,2), sep='')), cex=1)
if (j == 2){
title(main = expression(paste("Forecasted Spring Flow for March ", 1^{"st"} ," Lead Time")), outer = T)
}
else {
title(main = expression(paste("Forecasted Spring Flow for April ", 1^{"st"} ," Lead Time")), outer = T)
}
}
Comments