Joint Spatial Analysis & Field Forecasting using CCA
One of the objectives of multivariate analysis is to enable multivariate forecasting. Here we wish to predict the winter precipitation over Arizona as a function of winter Pacific SSTs. We use CCA for this and the simplified steps are described below.
Take the leading 5 PCs of the winter SSTs [which you can obtain following the procedure in problem 3] and perform CCA with the leading 5 PCs of winter precipitation.
Fit a regression for each canonical variate - canonical variate of precipitation related to canonical variate of SSTs. Use these regressions to predict the flow variates and back transform them to the original space - i.e., the precipitation space.
Evaluate the performance by computing R2 between the observed and predicted summer precipitation at each grid point.
Use the model to predict the maximum precipitation for the last two years.
#### CLEAR MEMORY
rm(list=ls())
graphics.off()
#### Prevent Warnings
options(warn=-1)
#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW2"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW2.R"))
source("Lib_HW2.R")
# unload packages that create a conflict with map("world2",add=T)
detach(package:mclust)
detach(package:sm)
# information obtained from: http://iridl.ldeo.columbia.edu/SOURCES/.KAPLAN/.EXTENDED/.v2/
ntime = 118 #Nov-Mar 1901 - Nov-Mar 2018
### Lat - Long grid..
ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid) # number of latitudinal locations
xgrid=seq(27.5,382.5,by=5)
nx=length(xgrid) # number of longitudinal locations
nglobe = nx*ny
# creation of spatial grid
xygrid=matrix(0,nrow=nx*ny,ncol=2)
i=0
for(iy in 1:ny){
for(ix in 1:nx){
i=i+1
xygrid[i,1]=ygrid[iy]
xygrid[i,2]=xgrid[ix]
}
}
data=readBin("./data/Kaplan-SST-NDJFM1900-NDJFM2018.r4",what="numeric", n=( nx * ny * ntime), size=4,endian="swap")
data <- array(data = data, dim=c( nx, ny, ntime ) )
data1=data[,,1]
# the lat -long data grid..
index=1:(nx*ny)
index1=index[data1 < 20 & data1 != "NaN"] # only non-missing data.
xygrid1=xygrid[index1,]
nsites=length(index1)
### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstannavg=matrix(NA,nrow=ntime, ncol=nsites)
for(i in 1:ntime){
data1=data[,,i]
index1=index[data1 < 20 & data1 != "NaN"]
data2=data1[index1]
sstannavg[i,]=data2
}
indexgrid = index1
rm("data") #remove the object data to clear up space
## Index of locations corresponding to Pacific
sstdata=sstannavg
xlongs = xygrid1[,2]
ylats = xygrid1[,1]
indexpac = indexgrid[ ylats >= -20 & xlongs >= 105 & xlongs <= 290]
xlong = xlongs[ylats >= -20 & xlongs >= 105 & xlongs <= 290]
ylat = ylats[ylats >= -20 & xlongs >= 105 & xlongs <= 290]
index1=1:length(xlongs)
index = index1[ ylats >= -20 & xlongs >= 105 & xlongs <= 290]
sstannavg = sstdata[,index]#Nov-Mar 1901 - Nov-Mar 2018
indexgrid = indexpac
lon = sort(unique(xygrid[,2]))
lat = sort(unique(xygrid[,1]))
wuplocs = matrix(scan("./data/WesternUS-coords.txt"), ncol=5,byrow=T)
nlocs = dim(wuplocs)[1]
xlats = wuplocs[,3]
xlongs = -wuplocs[,4]
nlocs1=nlocs+1
winterprecip = matrix(scan("./data/western-US-winterprecip-1901-2014.txt"),ncol=nlocs1,byrow=T)
years = winterprecip[,1]
N = length(years) #Nov-Mar 1901 - Nov-Mar 2013
winterprecip = winterprecip[,2:nlocs1] #first column is year
nlevel=11
coltab=rev(brewer.pal(nlevel-1,'RdBu'))
zr=range(winterprecip[N,])
break_a=round(seq(zr[1],zr[2],by=((zr[2]-zr[1])/(nlevel-1))))
quilt.plot(xlongs, xlats,winterprecip[N,] ,xlim=range(-125,-100),ylim=c(31.3,49.2),col=coltab,main = "2013 winter precipitation Western-US",zlim=zr,lab.breaks=break_a)
grid(col="gray70",lty=2)
US(add=TRUE, col="gray50", lwd=2,xlim=range(-125,-100))
box()
## filter the Arizona winter precipitation
ind=1:length(xlats)
ind1 = ind[xlats>=32 & xlats<=37 & xlongs >= -115 & xlongs <= -109]
xlon=xlongs[ind1]
xlat = xlats[ind1]
winter_prec_AZ=winterprecip[,ind1]
zr=range(winter_prec_AZ[N,])
break_a=round(seq(zr[1],zr[2],by=((zr[2]-zr[1])/(nlevel-1)))*10)/10
quilt.plot(xlon, xlat,winter_prec_AZ[N,] ,xlim=range(-115,-109),ylim=c(31.3,37.05),nx=30, ny=30,col=coltab,main = "2013 winter precipitation Arizona",zlim=zr,lab.breaks=break_a)
grid(col="gray70",lty=2)
US(add=TRUE, col="gray50", lwd=2,xlim=range(-125,-100))
box()
##PCA of Pacific SSTs
##########################################################
sstannavg2=sstannavg[1:N,] #SSTs Nov-Mar 1901 - Nov-Mar 2013
sstannavg1 = scale(sstannavg2)
zs=var(sstannavg1)
#do an Eigen decomposition..
zsvd=svd(zs)
ssteof = zsvd$u
#Principal Components...
pcs= sstannavg1 %*% zsvd$u
#Eigen Values.. - fraction variance
lambdas=(zsvd$d/sum(zsvd$d))
npc = 5 # number of PCs to use for CCA
sstPC = pcs[,1:npc]
sstpcfull = pcs
precMean = apply(winter_prec_AZ, 2, mean)
precSd = apply(winter_prec_AZ, 2, sd)
precip=scale(winter_prec_AZ)
zs=var(precip)
#do an Eigen decomposition..
zsvd=svd(zs)
preceof = zsvd$u
#Principal Components...
pcs=precip %*% zsvd$u
precpcfull=pcs
precPC = pcs[,1:npc]
M=dim(sstPC)[2]
J=dim(precPC)[2]
J=min(M,J)
N = length(precPC[,1])
Qx1 = qr.Q(qr(sstPC))
Qy1 = qr.Q(qr(precPC))
T11 = qr.R(qr(sstPC))
T22 = qr.R(qr(precPC))
VV=t(Qx1) %*% Qy1
# BB = svd(VV)$v
# AA = svd(VV)$u
BB = solve(T22) %*% svd(VV)$v * sqrt(N-1)
wm1 = precPC %*% BB
AA = solve(T11) %*% svd(VV)$u * sqrt(N-1)
vm1 = sstPC %*% AA
sstannavg2=sstannavg[117:118,] # SST Nov-Mar 2017 - Nov-Mar 2018
#PCA of SSTs
sstpc1718 = sstannavg2 %*% ssteof
sstpc1718 = sstpc1718[,1:npc]
## predict the PCs
ypred1718=sstpc1718 %*% AA %*% betahat
### first npc PCs from the PC forecast above and the remaining PCs are set to
## their means - i.e., 0
precPCpred = cbind(ypred1718,matrix(rep(0,2),ncol=N1,nrow=2))
## back transform to get the winter precipitation field
precpred = precPCpred %*% t(E)
### rescale the winter precipitation
precpred=t(t(precpred)*precSd + precMean)
Comments
According to the R2 between the observed and predicted winter precipitation at each grid point, the model seems to have a better performance in the south of the state.
In the case of the maximum precipitation for the last two years, the maximum value of precipitation occurs in 2017 at the middle east of the state, and its value is 1.8 (I do not know the units of this value).
In both years maximum values are in the north east of the state.