HW3_Prob6

6. Modeling Space-Time Nonstationary Extreme Value Analysis (EVA)
Perform a space-time nonstationary EVA on the winter 3-day maximum precipitation. Below are the steps:
At each location fit a nonstationary GEV model to the 3-day winter maximum precipitation as function of the 3 covariates - the leading ~3 winter SST PCs. Make only the location parameter of the GEV nonstationary. This will result in 4 coefficients (intercept plus the three covariates)

Fit a spatial model to each of the coefficients and to the scale and shape parameter
For a couple of wet and dry years (you can select the years based on the average spatial 3-day precipitation) - obtain the 2-year, 50-year and 100-year return levels on the spatial grid
1. Using spatial models obtain the GEV parameters at each grid point
2. Estimate the 2-year, 50-year and 100-year return levels at each grid point and map them. Compare with 2-year return levels with the observed values in the selected years
For a couple of representative locations plot the time series of 3-day precipitation maximum along with the time varying return levels. Compare them with the stationary return levels

#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#### Initialize Graphics
graphics.off()
.pardefault <- par()

#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 03/"
setwd(mainDir)
suppressPackageStartupMessages(source("hw3_library.R"))
source("hw3_library.R")
set.seed(3000)

Obtain leading ~3 winter SST PCs

nrows=72
ncols=36
ntime = 118    #Nov-Mar 1901 - Nov-Mar 2018
nyrs = 55       # Nov-Mar 1964 - Nov-Mar 2018

nglobe = nrows*ncols
N = nrows*ncols

### Lat - Long grid..
locs=matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/files-4HW2/sst-lat-long.txt"), ncol=2, byrow=T)
ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid)

xgrid=seq(27.5,382.5,by=5)
#xgrid[xgrid > 180]=xgrid[xgrid > 180]-360  #longitude on 0-360 grid if needed
xgrid[xgrid > 180]=xgrid[xgrid > 180]
nx=length(xgrid)

xygrid=matrix(0,nrow=nx*ny,ncol=2)

i=0
for(iy in 1:ny){
  for(ix in 1:nx){
    i=i+1
    xygrid[i,1]=ygrid[iy]
    xygrid[i,2]=xgrid[ix]
  }
  
}

Read Kaplan SST data

data=readBin(paste(mainDir, "data/prob6/data.r4", sep = ""),what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")
data <- array(data = data, dim=c( nrows, ncols, ntime ) )
data = data[,,64:ntime]

data1=data[,,1]

# the lat -long data grid..

index=1:(nx*ny)

index1=index[data1 < 20 & data1 != "NaN"]   # only non-missing data.
xygrid1=xygrid[index1,]

x1=xygrid1[,2]
#x1[x1 < 0]= x1[x1 < 0] + 360
#xygrid1[,2]=x1

nsites=length(index1)
data2=data1[index1]

### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstdata=matrix(NA,nrow=nyrs, ncol=nsites)

for(i in 1:nyrs){
  data1=data[,,i]
  index1=index[data1 < 20 & data1 != "NaN"]
  data2=data1[index1]
  sstdata[i,]=data2
}

sstannavg = sstdata
indexgrid = index1
rm("data")  #remove the object data to clear up space

## write out the grid locations..
write(t(xygrid1),file="kaplan-sst-locs.txt",ncol=2)

Compute leading ~3 winter SST PCs

#get variance matrix..
## scale the data
sstscale = scale(sstannavg)
zs=var(sstscale)

#do an Eigen decomposition..
zsvd=svd(zs)

#Principal Components...
sst_pcs=t(t(zsvd$u) %*% t(sstscale))

Read in 3-day winter maximum precipitation data

data = read.table(paste(mainDir, "data/prob6/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
loc = read.table(paste(mainDir, "data/prob6/Precipitaton_data.txt", sep = ""), header = T)
lon = loc[,1]
lat = loc[,2]
xs = cbind(lon,lat)

nloc = nrow(loc)
years = data[,1]
precip = data[,2:ncol(data)]

# Read in high-resolution grid locations
elev_grid=read.delim(paste(mainDir, "/data/prob6/Elevation_grid_1deg.txt", sep = ""), header = TRUE, sep = "\t", dec = ".")
names(elev_grid) = c("Long","Lat","Elev")
lon = elev_grid[,1]
lat = elev_grid[,2]
xps = cbind(lon,lat)

nloch = nrow(elev_grid)

At each location fit a nonstationary GEV model to the 3-day winter maximum precipitation as function of the 3 covariates - the leading 3 winter SST PCs

Make only the location parameter of the GEV nonstationary. This will result in 4 coefficients (intercept plus the three covariates)

dat = sst_pcs[,1:3]
dat = as.data.frame(dat)
names(dat) = c("PC1","PC2","PC3")
param = matrix(data = NA, nrow = nloc, ncol = 6)
for (i in 1:nloc) {
  prec = precip[,i]
  dat$prec = prec
  fit.gev <- fevd(prec,dat,location.fun = ~PC1+PC2+PC3, units = "mm", use.phi = T)
  param[i,] = fit.gev$results$par
}
param.full = as.data.frame(param)
names(param.full) = c("mu_0","mu_1","mu_2","mu_3","sigma","xi")

Fit a spatial model to each of the coefficients and to the scale and shape parameter

spatial = matrix(data = NA, nrow = nloch, ncol = ncol(param))      # store coefficients for 1deg locations
spatial.SE = matrix(data = NA, nrow = nloch, ncol = ncol(param))      # store standard error for each location
for (i in 1:ncol(param)){
  # Compute variogram
  look <- vgram(xs, param[,i], N = 10, lon.lat = T)
  # Fit variogram using nonlinear least squares
  xd <- look$centers
  ym <- look$stats[2,]
  tau2 <- 0     # Nugget
  nls.fit <- nls(ym ~ tau2 + sigma*(1-exp(-xd/theta)), start = list(sigma = max(look$stats[2,]), theta = quantile(xd,0.1)), control = list(maxiter = 50000, minFactor = 1e-16))
  pars <- coefficients(nls.fit)
  sigma <- pars[1]
  theta <- pars[2]
  xr = round(max(xd)+sd(xd))
  # Predict coefficient over high-resolution grid
  mod = Krig(xs, param[,i], sigma = sigma, theta = theta, m = 1, tau2 = tau2)
  ghat = predict.Krig(mod, x = xps, drop.Z = T)
  kse = predictSE.Krig(mod, x = xps)
  
  spatial[,i] = ghat
  spatial.SE[,i] = kse
}

For a couple of wet and dry years (you can select the years based on the average spatial 3-day precipitation) - obtain the 2-year, 50-year and 100-year return levels on the spatial grid

### Select wet and dry years
# Omit extreme values
non_values <- which(precip > 1000, arr.ind = T)
precip[non_values] = NaN

prec_annavg = rowMeans(precip)
yr_dry = years[which.min(prec_annavg)]
print(sprintf("The selected dry year is %s", yr_dry))

## [1] "The selected dry year is 1972"

yr_wet = years[which.max(prec_annavg)]
print(sprintf("The selected wet year is %s", yr_wet))

## [1] "The selected wet year is 1993"

(i) Using the spatial models obtain the GEV parameters at each grid point

Plot GEV parameters and standard error on high-resolution grid

r_pal=rev(brewer.pal(10,"RdBu"))
xlim1=c(-115,-101.3)
ylim1=c(29.5,42.5)
namex=seq(from=-114,to=-102,by=2)
labx=paste(abs(namex[1]),"\u00B0W",sep = "")
for (i in 2:length(namex)) {
  labx=c(labx,paste(abs(namex[i]),"\u00B0W",sep = ""))
}
namey=seq(from=30,to=42,by=2)
laby=paste(abs(namey[1]),"\u00B0N",sep = "")
for (i in 2:length(namey)) {
  laby=c(laby,paste(abs(namey[i]),"\u00B0N",sep = ""))
}

for (i in 1:ncol(param)){
  name = colnames(param.full)[i]
  par(.pardefault)
  plot_sp_grid(param[,i],spatial[,i],spatial.SE[,i],loc,elev_grid,name,xlim1,ylim1,namex,namey,labx,laby,r_pal)
}

(ii) Estimate the 2-year, 5-year, and 100-year return levels at each grid point and map them. Compare with the observed values in the selected years.

period=c(2,50,100)  # return periods considered
ind_yrs = c(which(years==yr_dry),which(years==yr_wet))
estimate=list(T2=vector("list", length = nloc), T50=vector("list", length = nloc), T100=vector("list", length = nloc))

for (i in 1:length(ind_yrs)){
  par( oma=c(0,0,3,1)) # save some room for the legend
  par(mfrow = c(2, 2))
  par(mar = c(4.5, 4.7, 2, 1.5))
  
  # Calculate return levels
  for (j in 1:nloch){
    loca = as.matrix(dat[ind_yrs[i],1:3]) %*% spatial[j,2:4] + spatial[j,1]
    scal = exp(spatial[j,5])
    shap = spatial[j,6]
    tmp1 = qgev(1 - 1/period, xi = shap, mu = loca, beta = scal, lower.tail = T)
    estimate$T2[j] = tmp1[1]
    estimate$T50[j] = tmp1[2]
    estimate$T100[j] = tmp1[3]
  }
  # Plot observed precipitation for chosen year
  quilt.plot(loc[,1],loc[,2],as.numeric(precip[ind_yrs[i],]),axes=F,xlab="Longitude",ylab="Latitude",main="Observed",ylim=ylim1,xlim=xlim1,col=r_pal,cex.main=1,cex.lab=1.3)
  US(add=TRUE, col="gray30", lwd=2)
  box()
  axis(1, at=namex, labels=labx)
  axis(2, at=namey, labels=laby)
  
  # Plot 2-year return level
  quilt.plot(elev_grid[,1],elev_grid[,2],as.numeric(estimate$T2),axes=F,xlab="Longitude",ylab="Latitude",main="Estimated 2-year return level", ylim=ylim1,xlim=xlim1,col=r_pal,cex.main=1,cex.lab=1.3)
  US(add=TRUE, col="gray30", lwd=2)
  box()
  axis(1, at=namex, labels=labx)
  axis(2, at=namey, labels=laby)
  
  zlim1 = c(range(estimate$T50)[1],120)
  # Plot 50-year return level
  quilt.plot(elev_grid[,1],elev_grid[,2],as.numeric(estimate$T50),axes=F,xlab="Longitude",ylab="Latitude",main="Estimated 50-year return level",zlim = zlim1, ylim=ylim1,xlim=xlim1,col=r_pal,cex.main=1,cex.lab=1.3)
  US(add=TRUE, col="gray30", lwd=2)
  box()
  axis(1, at=namex, labels=labx)
  axis(2, at=namey, labels=laby)
  
  zlim1 = c(range(estimate$T100)[1],150)
  # Plot 100-year return level
  quilt.plot(elev_grid[,1],elev_grid[,2],as.numeric(estimate$T100),axes=F,xlab="Longitude",ylab="Latitude",main="Estimated 100-year return level",zlim = zlim1, ylim=ylim1,xlim=xlim1,col=r_pal,cex.main=1,cex.lab=1.3)
  US(add=TRUE, col="gray30", lwd=2)
  box()
  axis(1, at=namex, labels=labx)
  axis(2, at=namey, labels=laby)
  
  if (i == 1){
    mtext(paste("Winter 3-day Maximum Precipitation for the Driest Year (",years[ind_yrs[i]],"), (mm)",sep = ""), outer = TRUE, side = 3, cex = 1, line = 1)
  }
  else {
    mtext(paste("Winter 3-day Maximum Precipitation for the Wettest Year (",years[ind_yrs[i]],"), (mm)",sep = ""), outer = TRUE, side = 3, cex = 1, line = 1)
  }
  
}

For a couple of representative locations plot the time series of 3-day precipitation maximum along with the time-varying return levels. Compare them with the stationary return levels.

prec_locavg = colMeans(precip)
ind_dry = which.min(prec_locavg)
print(sprintf("The driest location is %s", colnames(precip)[ind_dry]))

## [1] "The driest location is USW00023061"

ind_wet = which.max(prec_locavg)
print(sprintf("The wettest location is %s", colnames(precip)[ind_wet]))

## [1] "The wettest location is USC00023448"

ind_loc = c(ind_dry, ind_wet)
par(.pardefault)
timeser = list(T2=vector("list", length = nyrs), T50=vector("list", length = nyrs), T100=vector("list", length = nyrs))
  
for (i in 1:length(ind_loc)){
  par(oma=c(0,0,3,1)) # save some room for the legend
  par(mar = c(4.5, 4.7, 2, 1.5))
  
  # Calculate return levels
  for (j in 1:nyrs){
    loca = as.matrix(dat[j,1:3]) %*% param[ind_loc[i],2:4] + param[ind_loc[i],1]
    scal = exp(param[ind_loc[i],5])
    shap = param[ind_loc[i],6]
    tmp1 = qgev(1 - 1/period, xi = shap, mu = loca, beta = scal, lower.tail = T)
    timeser$T2[j] = tmp1[1]
    timeser$T50[j] = tmp1[2]
    timeser$T100[j] = tmp1[3]
  }
  
  obs <- data.frame(cbind(years,precip[,ind_loc[i]]))
  t2 <- data.frame(cbind(years,as.numeric(timeser$T2)))
  t50 <- data.frame(cbind(years,as.numeric(timeser$T50)))
  t100 <- data.frame(cbind(years,as.numeric(timeser$T100)))
  
  ### Plot time series of observed precipitation and return levels for chosen location and compare with stationary levels
  prec = precip[,ind_loc[i]]
  dat$prec = prec
  fit.gevS <- fevd(prec,dat, units = "mm")
  return_stat2 = return.level.fevd(fit.gevS,return.period = c(2))[1]
  return_stat50 = return.level.fevd(fit.gevS,return.period = c(50))[1]
  return_stat100 = return.level.fevd(fit.gevS,return.period = c(100))[1]
  
  tsplot <- ggplot(obs, aes(obs$years, obs$V2)) + geom_line(aes(color = "black")) +
    geom_line(aes(t2$years,t2$V2, color = "red")) +
    geom_line(aes(t50$years,t50$V2, color = "blue")) +
    geom_line(aes(t100$years,t100$V2,color = "green")) +
    geom_abline(intercept = return_stat2, slope = 0, colour = "grey") +
    geom_abline(intercept = return_stat50, slope = 0, colour = "grey") +
    geom_abline(intercept = return_stat100, slope = 0, colour = "grey") +
    xlab("Year") + ylab("Precipitation (mm)") +
    scale_colour_manual(name = "Time Series", values = c("black"="black",'red'='red','blue'='blue','green'='green',"grey"="grey"), labels = c('Observed', '2-Year', '50-Year',"100-Year","Stationary"))
  
  if (i == 1){
    tsplot <- tsplot + labs(title = paste("Winter 3-day Maximum Precipitation for the Driest Location (",colnames(precip)[ind_loc[i]],"), (mm)",sep = "")) +
      theme(plot.title = element_text(size = 10))
  }
  else {
    tsplot <- tsplot + labs(title = paste("Winter 3-day Maximum Precipitation for the Wettest Location (",colnames(precip)[ind_loc[i]],"), (mm)",sep = ""))+
      theme(plot.title = element_text(size = 10))
  }
  
  show(tsplot)
  
}

Comments

For the driest year (1972), most of Arizona experienced less precipitation as the 2-year return level where the mountainous region of Colorado experienced greater precipitation than the 2-year return level. These patterns continue in the 50- and 100-year return levels
For the wettest year (1993), most of the southwest US experienced precipitation similar to the estimated 2-year return level with the exception of a couple of locations in Arizona, which experienced significantly greater precipitation than the 2- and 50-year return levels. Spatial patterns persisted through all of the return level years.
The nonstationary return levels remain relatively stable in time and match closely with the stationary return levels for both the wettest and driest location
The 2-year nonstationary return levels capture the mean of the observed precipitation while the 50- and 100-year nonstationary return levels suggest increasing risk levels for both locations