6. Modeling Space-Time Nonstationary Extreme Value Analysis (EVA)
Perform a space-time nonstationary EVA on the winter 3-day maximum precipitation. Below are the steps:
At each location fit a nonstationary GEV model to the 3-day winter maximum precipitation as function of the 3 covariates - the leading ~3 winter SST PCs. Make only the location parameter of the GEV nonstationary. This will result in 4 coefficients (intercept plus the three covariates)

#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#### Initialize Graphics
graphics.off()
.pardefault <- par()

#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 03/"
setwd(mainDir)
suppressPackageStartupMessages(source("hw3_library.R"))
source("hw3_library.R")
set.seed(3000)

Obtain leading ~3 winter SST PCs

nrows=72
ncols=36
ntime = 118    #Nov-Mar 1901 - Nov-Mar 2018
nyrs = 55       # Nov-Mar 1964 - Nov-Mar 2018

nglobe = nrows*ncols
N = nrows*ncols

### Lat - Long grid..
locs=matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/files-4HW2/sst-lat-long.txt"), ncol=2, byrow=T)
ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid)

xgrid=seq(27.5,382.5,by=5)
#xgrid[xgrid > 180]=xgrid[xgrid > 180]-360  #longitude on 0-360 grid if needed
xgrid[xgrid > 180]=xgrid[xgrid > 180]
nx=length(xgrid)

xygrid=matrix(0,nrow=nx*ny,ncol=2)

i=0
for(iy in 1:ny){
  for(ix in 1:nx){
    i=i+1
    xygrid[i,1]=ygrid[iy]
    xygrid[i,2]=xgrid[ix]
  }
  
}

Read Kaplan SST data

data=readBin(paste(mainDir, "data/prob6/data.r4", sep = ""),what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")
data <- array(data = data, dim=c( nrows, ncols, ntime ) )
data = data[,,64:ntime]

data1=data[,,1]

# the lat -long data grid..

index=1:(nx*ny)

index1=index[data1 < 20 & data1 != "NaN"]   # only non-missing data.
xygrid1=xygrid[index1,]

x1=xygrid1[,2]
#x1[x1 < 0]= x1[x1 < 0] + 360
#xygrid1[,2]=x1

nsites=length(index1)
data2=data1[index1]

### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstdata=matrix(NA,nrow=nyrs, ncol=nsites)

for(i in 1:nyrs){
  data1=data[,,i]
  index1=index[data1 < 20 & data1 != "NaN"]
  data2=data1[index1]
  sstdata[i,]=data2
}

sstannavg = sstdata
indexgrid = index1
rm("data")  #remove the object data to clear up space

## write out the grid locations..
write(t(xygrid1),file="kaplan-sst-locs.txt",ncol=2)

Compute leading ~3 winter SST PCs

#get variance matrix..
## scale the data
sstscale = scale(sstannavg)
zs=var(sstscale)

#do an Eigen decomposition..
zsvd=svd(zs)

#Principal Components...
sst_pcs=t(t(zsvd$u) %*% t(sstscale))

Read in 3-day winter maximum precipitation data

data = read.table(paste(mainDir, "data/prob6/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
loc = read.table(paste(mainDir, "data/prob6/Precipitaton_data.txt", sep = ""), header = T)
lon = loc[,1]
lat = loc[,2]
xs = cbind(lon,lat)

nloc = nrow(loc)
years = data[,1]
precip = data[,2:ncol(data)]

# Read in high-resolution grid locations
elev_grid=read.delim(paste(mainDir, "/data/prob6/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
xps = cbind(lon,lat)

nloch = nrow(elev_grid)

At each location fit a nonstationary GEV model to the 3-day winter maximum precipitation as function of the 3 covariates - the leading 3 winter SST PCs

Make only the location parameter of the GEV nonstationary. This will result in 4 coefficients (intercept plus the three covariates)

dat = sst_pcs[,1:3]
dat = as.data.frame(dat)
names(dat) = c("PC1","PC2","PC3")
param = matrix(data = NA, nrow = nloc, ncol = 6)
for (i in 1:nloc) {
  prec = precip[,i]
  dat$prec = prec
  fit.gev <- fevd(prec,dat,location.fun = ~PC1+PC2+PC3, units = "mm", use.phi = T)
  param[i,] = fit.gev$results$par
}
param.full = as.data.frame(param)
names(param.full) = c("mu_0","mu_1","mu_2","mu_3","sigma","xi")

Fit a spatial model to each of the coefficients and to the scale and shape parameter

spatial = matrix(data = NA, nrow = nloch, ncol = ncol(param))      # store coefficients for 1deg locations
spatial.SE = matrix(data = NA, nrow = nloch, ncol = ncol(param))      # store standard error for each location
for (i in 1:ncol(param)){
  # Compute variogram
  look <- vgram(xs, param[,i], N = 10, lon.lat = T)
  # Fit variogram using nonlinear least squares
  xd <- look$centers
  ym <- look$stats[2,]
  tau2 <- 0     # Nugget
  nls.fit <- nls(ym ~ tau2 + sigma*(1-exp(-xd/theta)), start = list(sigma = max(look$stats[2,]), theta = quantile(xd,0.1)), control = list(maxiter = 50000, minFactor = 1e-16))
  pars <- coefficients(nls.fit)
  sigma <- pars[1]
  theta <- pars[2]
  xr = round(max(xd)+sd(xd))
  # Predict coefficient over high-resolution grid
  mod = Krig(xs, param[,i], sigma = sigma, theta = theta, m = 1, tau2 = tau2)
  ghat = predict.Krig(mod, x = xps, drop.Z = T)
  kse = predictSE.Krig(mod, x = xps)
  
  spatial[,i] = ghat
  spatial.SE[,i] = kse
}

For a couple of wet and dry years (you can select the years based on the average spatial 3-day precipitation) - obtain the 2-year, 50-year and 100-year return levels on the spatial grid

### Select wet and dry years
# Omit extreme values
non_values <- which(precip > 1000, arr.ind = T)
precip[non_values] = NaN

prec_annavg = rowMeans(precip)
yr_dry = years[which.min(prec_annavg)]
print(sprintf("The selected dry year is %s", yr_dry))
## [1] "The selected dry year is 1972"
yr_wet = years[which.max(prec_annavg)]
print(sprintf("The selected wet year is %s", yr_wet))
## [1] "The selected wet year is 1993"

(i) Using the spatial models obtain the GEV parameters at each grid point

Plot GEV parameters and standard error on high-resolution grid

r_pal=rev(brewer.pal(10,"RdBu"))
xlim1=c(-115,-101.3)
ylim1=c(29.5,42.5)
namex=seq(from=-114,to=-102,by=2)
labx=paste(abs(namex[1]),"\u00B0W",sep = "")
for (i in 2:length(namex)) {
  labx=c(labx,paste(abs(namex[i]),"\u00B0W",sep = ""))
}
namey=seq(from=30,to=42,by=2)
laby=paste(abs(namey[1]),"\u00B0N",sep = "")
for (i in 2:length(namey)) {
  laby=c(laby,paste(abs(namey[i]),"\u00B0N",sep = ""))
}
for (i in 1:ncol(param)){
  name = colnames(param.full)[i]
  par(.pardefault)
  plot_sp_grid(param[,i],spatial[,i],spatial.SE[,i],loc,elev_grid,name,xlim1,ylim1,namex,namey,labx,laby,r_pal)
}

(ii) Estimate the 2-year, 5-year, and 100-year return levels at each grid point and map them. Compare with the observed values in the selected years.

period=c(2,50,100)  # return periods considered
ind_yrs = c(which(years==yr_dry),which(years==yr_wet))
estimate=list(T2=vector("list", length = nloc), T50=vector("list", length = nloc), T100=vector("list", length = nloc))

for (i in 1:length(ind_yrs)){
  par( oma=c(0,0,3,1)) # save some room for the legend
  par(mfrow = c(2, 2))
  par(mar = c(4.5, 4.7, 2, 1.5))
  
  # Calculate return levels
  for (j in 1:nloch){
    loca = as.matrix(dat[ind_yrs[i],1:3]) %*% spatial[j,2:4] + spatial[j,1]
    scal = exp(spatial[j,5])
    shap = spatial[j,6]
    tmp1 = qgev(1 - 1/period, xi = shap, mu = loca, beta = scal, lower.tail = T)
    estimate$T2[j] = tmp1[1]
    estimate$T50[j] = tmp1[2]
    estimate$T100[j] = tmp1[3]
  }
  # Plot observed precipitation for chosen year
  quilt.plot(loc[,1],loc[,2],as.numeric(precip[ind_yrs[i],]),axes=F,xlab="Longitude",ylab="Latitude",main="Observed",ylim=ylim1,xlim=xlim1,col=r_pal,cex.main=1,cex.lab=1.3)
  US(add=TRUE, col="gray30", lwd=2)
  box()
  axis(1, at=namex, labels=labx)
  axis(2, at=namey, labels=laby)
  
  # Plot 2-year return level
  quilt.plot(elev_grid[,1],elev_grid[,2],as.numeric(estimate$T2),axes=F,xlab="Longitude",ylab="Latitude",main="Estimated 2-year return level", ylim=ylim1,xlim=xlim1,col=r_pal,cex.main=1,cex.lab=1.3)
  US(add=TRUE, col="gray30", lwd=2)
  box()
  axis(1, at=namex, labels=labx)
  axis(2, at=namey, labels=laby)
  
  zlim1 = c(range(estimate$T50)[1],120)
  # Plot 50-year return level
  quilt.plot(elev_grid[,1],elev_grid[,2],as.numeric(estimate$T50),axes=F,xlab="Longitude",ylab="Latitude",main="Estimated 50-year return level",zlim = zlim1, ylim=ylim1,xlim=xlim1,col=r_pal,cex.main=1,cex.lab=1.3)
  US(add=TRUE, col="gray30", lwd=2)
  box()
  axis(1, at=namex, labels=labx)
  axis(2, at=namey, labels=laby)
  
  zlim1 = c(range(estimate$T100)[1],150)
  # Plot 100-year return level
  quilt.plot(elev_grid[,1],elev_grid[,2],as.numeric(estimate$T100),axes=F,xlab="Longitude",ylab="Latitude",main="Estimated 100-year return level",zlim = zlim1, ylim=ylim1,xlim=xlim1,col=r_pal,cex.main=1,cex.lab=1.3)
  US(add=TRUE, col="gray30", lwd=2)
  box()
  axis(1, at=namex, labels=labx)
  axis(2, at=namey, labels=laby)
  
  if (i == 1){
    mtext(paste("Winter 3-day Maximum Precipitation for the Driest Year (",years[ind_yrs[i]],"), (mm)",sep = ""), outer = TRUE, side = 3, cex = 1, line = 1)
  }
  else {
    mtext(paste("Winter 3-day Maximum Precipitation for the Wettest Year (",years[ind_yrs[i]],"), (mm)",sep = ""), outer = TRUE, side = 3, cex = 1, line = 1)
  }
  
}

For a couple of representative locations plot the time series of 3-day precipitation maximum along with the time-varying return levels. Compare them with the stationary return levels.

prec_locavg = colMeans(precip)
ind_dry = which.min(prec_locavg)
print(sprintf("The driest location is %s", colnames(precip)[ind_dry]))
## [1] "The driest location is USW00023061"
ind_wet = which.max(prec_locavg)
print(sprintf("The wettest location is %s", colnames(precip)[ind_wet]))
## [1] "The wettest location is USC00023448"
ind_loc = c(ind_dry, ind_wet)
par(.pardefault)
timeser = list(T2=vector("list", length = nyrs), T50=vector("list", length = nyrs), T100=vector("list", length = nyrs))
  
for (i in 1:length(ind_loc)){
  par(oma=c(0,0,3,1)) # save some room for the legend
  par(mar = c(4.5, 4.7, 2, 1.5))
  
  # Calculate return levels
  for (j in 1:nyrs){
    loca = as.matrix(dat[j,1:3]) %*% param[ind_loc[i],2:4] + param[ind_loc[i],1]
    scal = exp(param[ind_loc[i],5])
    shap = param[ind_loc[i],6]
    tmp1 = qgev(1 - 1/period, xi = shap, mu = loca, beta = scal, lower.tail = T)
    timeser$T2[j] = tmp1[1]
    timeser$T50[j] = tmp1[2]
    timeser$T100[j] = tmp1[3]
  }
  
  obs <- data.frame(cbind(years,precip[,ind_loc[i]]))
  t2 <- data.frame(cbind(years,as.numeric(timeser$T2)))
  t50 <- data.frame(cbind(years,as.numeric(timeser$T50)))
  t100 <- data.frame(cbind(years,as.numeric(timeser$T100)))
  
  ### Plot time series of observed precipitation and return levels for chosen location and compare with stationary levels
  prec = precip[,ind_loc[i]]
  dat$prec = prec
  fit.gevS <- fevd(prec,dat, units = "mm")
  return_stat2 = return.level.fevd(fit.gevS,return.period = c(2))[1]
  return_stat50 = return.level.fevd(fit.gevS,return.period = c(50))[1]
  return_stat100 = return.level.fevd(fit.gevS,return.period = c(100))[1]
  
  tsplot <- ggplot(obs, aes(obs$years, obs$V2)) + geom_line(aes(color = "black")) +
    geom_line(aes(t2$years,t2$V2, color = "red")) +
    geom_line(aes(t50$years,t50$V2, color = "blue")) +
    geom_line(aes(t100$years,t100$V2,color = "green")) +
    geom_abline(intercept = return_stat2, slope = 0, colour = "grey") +
    geom_abline(intercept = return_stat50, slope = 0, colour = "grey") +
    geom_abline(intercept = return_stat100, slope = 0, colour = "grey") +
    xlab("Year") + ylab("Precipitation (mm)") +
    scale_colour_manual(name = "Time Series", values = c("black"="black",'red'='red','blue'='blue','green'='green',"grey"="grey"), labels = c('Observed', '2-Year', '50-Year',"100-Year","Stationary"))
  
  if (i == 1){
    tsplot <- tsplot + labs(title = paste("Winter 3-day Maximum Precipitation for the Driest Location (",colnames(precip)[ind_loc[i]],"), (mm)",sep = "")) +
      theme(plot.title = element_text(size = 10))
  }
  else {
    tsplot <- tsplot + labs(title = paste("Winter 3-day Maximum Precipitation for the Wettest Location (",colnames(precip)[ind_loc[i]],"), (mm)",sep = ""))+
      theme(plot.title = element_text(size = 10))
  }
  
  show(tsplot)
  
}

Comments