Joint Spatial Analysis & Field Forecasting using CCA

One of the objectives of multivariate analysis is to enable multivariate forecasting. Here we wish to predict the winter precipitation over Arizona as a function of winter Pacific SSTs. We use CCA for this and the simplified steps are described below.

  1. Take the leading 5 PCs of the winter SSTs [which you can obtain following the procedure in problem 3] and perform CCA with the leading 5 PCs of winter precipitation.

  2. Fit a regression for each canonical variate - canonical variate of precipitation related to canonical variate of SSTs. Use these regressions to predict the flow variates and back transform them to the original space - i.e., the precipitation space.

  3. Evaluate the performance by computing R2 between the observed and predicted summer precipitation at each grid point.

  4. Use the model to predict the maximum precipitation for the last two years.

#### CLEAR MEMORY
rm(list=ls())
graphics.off()
#### Prevent Warnings
options(warn=-1)

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW2"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW2.R"))
source("Lib_HW2.R")
# unload packages that create a conflict with map("world2",add=T)
detach(package:mclust)
detach(package:sm)

Steps to create Pacific SSTs data

Lat - Long grid creation

# information obtained from: http://iridl.ldeo.columbia.edu/SOURCES/.KAPLAN/.EXTENDED/.v2/ 
ntime = 118    #Nov-Mar 1901 - Nov-Mar 2018

### Lat - Long grid..
ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid) # number of latitudinal locations

xgrid=seq(27.5,382.5,by=5)
nx=length(xgrid) # number of longitudinal locations

nglobe = nx*ny
# creation of spatial grid
xygrid=matrix(0,nrow=nx*ny,ncol=2)
i=0
for(iy in 1:ny){
  for(ix in 1:nx){
    i=i+1
    xygrid[i,1]=ygrid[iy]
    xygrid[i,2]=xgrid[ix]
  }
}

Read Kaplan SST data..

data=readBin("./data/Kaplan-SST-NDJFM1900-NDJFM2018.r4",what="numeric", n=( nx * ny * ntime), size=4,endian="swap")

data <- array(data = data, dim=c( nx, ny, ntime ) )
data1=data[,,1]

# the lat -long data grid..

index=1:(nx*ny)

index1=index[data1 < 20 & data1 != "NaN"]   # only non-missing data.
xygrid1=xygrid[index1,]

nsites=length(index1)

### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstannavg=matrix(NA,nrow=ntime, ncol=nsites)

for(i in 1:ntime){
  data1=data[,,i]
  index1=index[data1 < 20 & data1 != "NaN"]
  data2=data1[index1]
  sstannavg[i,]=data2
}  

indexgrid = index1
rm("data")  #remove the object data to clear up space

## Index of locations corresponding to Pacific 
sstdata=sstannavg
xlongs = xygrid1[,2]
ylats = xygrid1[,1]
indexpac = indexgrid[ ylats >= -20 & xlongs >= 105 & xlongs <= 290]


xlong = xlongs[ylats >= -20 &  xlongs >= 105 & xlongs <= 290]
ylat = ylats[ylats >= -20 &  xlongs >= 105 & xlongs <= 290]

index1=1:length(xlongs)
index = index1[ ylats >= -20 & xlongs >= 105 & xlongs <= 290]

sstannavg = sstdata[,index]#Nov-Mar 1901 - Nov-Mar 2018
indexgrid = indexpac
lon = sort(unique(xygrid[,2]))
lat = sort(unique(xygrid[,1]))

Read Western US grid and winter precipitation data

wuplocs = matrix(scan("./data/WesternUS-coords.txt"), ncol=5,byrow=T)
nlocs = dim(wuplocs)[1]
xlats = wuplocs[,3]
xlongs = -wuplocs[,4]

nlocs1=nlocs+1
winterprecip = matrix(scan("./data/western-US-winterprecip-1901-2014.txt"),ncol=nlocs1,byrow=T)
years = winterprecip[,1]
N = length(years) #Nov-Mar 1901 - Nov-Mar 2013 
winterprecip = winterprecip[,2:nlocs1]      #first column is year

nlevel=11
coltab=rev(brewer.pal(nlevel-1,'RdBu'))
zr=range(winterprecip[N,])
break_a=round(seq(zr[1],zr[2],by=((zr[2]-zr[1])/(nlevel-1))))
quilt.plot(xlongs, xlats,winterprecip[N,] ,xlim=range(-125,-100),ylim=c(31.3,49.2),col=coltab,main = "2013 winter precipitation Western-US",zlim=zr,lab.breaks=break_a)
grid(col="gray70",lty=2)
US(add=TRUE, col="gray50", lwd=2,xlim=range(-125,-100))
box()

## filter the Arizona winter precipitation 
ind=1:length(xlats)
ind1 = ind[xlats>=32 & xlats<=37 & xlongs >= -115 & xlongs <= -109]
xlon=xlongs[ind1]
xlat = xlats[ind1]
winter_prec_AZ=winterprecip[,ind1]

zr=range(winter_prec_AZ[N,])
break_a=round(seq(zr[1],zr[2],by=((zr[2]-zr[1])/(nlevel-1)))*10)/10
quilt.plot(xlon, xlat,winter_prec_AZ[N,] ,xlim=range(-115,-109),ylim=c(31.3,37.05),nx=30, ny=30,col=coltab,main = "2013 winter precipitation Arizona",zlim=zr,lab.breaks=break_a)
grid(col="gray70",lty=2)
US(add=TRUE, col="gray50", lwd=2,xlim=range(-125,-100))
box()

##PCA of Pacific SSTs

##########################################################
sstannavg2=sstannavg[1:N,] #SSTs Nov-Mar 1901 - Nov-Mar 2013
sstannavg1 = scale(sstannavg2)
zs=var(sstannavg1)

#do an Eigen decomposition..
zsvd=svd(zs)

ssteof = zsvd$u

#Principal Components...
pcs= sstannavg1 %*% zsvd$u

#Eigen Values.. - fraction variance 
lambdas=(zsvd$d/sum(zsvd$d))

npc = 5     # number of PCs to use for CCA
sstPC = pcs[,1:npc]
sstpcfull = pcs

PCA of Arizona winter precipitation

precMean = apply(winter_prec_AZ, 2, mean)
precSd = apply(winter_prec_AZ, 2, sd)


precip=scale(winter_prec_AZ)
zs=var(precip)

#do an Eigen decomposition..
zsvd=svd(zs)

preceof = zsvd$u

#Principal Components...
pcs=precip %*% zsvd$u
precpcfull=pcs
precPC = pcs[,1:npc]

CCA of both Pacific SSTs and Arizona winter precipitation

M=dim(sstPC)[2]
J=dim(precPC)[2]
J=min(M,J)
N = length(precPC[,1])
Qx1 = qr.Q(qr(sstPC))
Qy1 = qr.Q(qr(precPC))
T11 = qr.R(qr(sstPC))
T22 = qr.R(qr(precPC))
VV=t(Qx1) %*% Qy1
# BB = svd(VV)$v
# AA = svd(VV)$u
BB = solve(T22) %*% svd(VV)$v * sqrt(N-1)
wm1 = precPC %*% BB
AA = solve(T11) %*% svd(VV)$u * sqrt(N-1)
vm1 = sstPC %*% AA

Forecast #Nov-Mar 2017 - Nov-Mar 2018

sstannavg2=sstannavg[117:118,] # SST Nov-Mar 2017 - Nov-Mar 2018
#PCA of SSTs
sstpc1718 = sstannavg2 %*% ssteof
sstpc1718 = sstpc1718[,1:npc]

## predict the PCs
ypred1718=sstpc1718 %*% AA %*% betahat

### first npc PCs from the PC forecast above and the remaining PCs are set to
## their means - i.e., 0
precPCpred = cbind(ypred1718,matrix(rep(0,2),ncol=N1,nrow=2))

## back transform to get the winter precipitation field
precpred =  precPCpred %*% t(E)

### rescale the winter precipitation
precpred=t(t(precpred)*precSd + precMean)

Comments

  • According to the R2 between the observed and predicted winter precipitation at each grid point, the model seems to have a better performance in the south of the state.

  • In the case of the maximum precipitation for the last two years, the maximum value of precipitation occurs in 2017 at the middle east of the state, and its value is 1.8 (I do not know the units of this value).

  • In both years maximum values are in the north east of the state.