10. Joint Spatial Analysis & Field Forecasting using CCA
One of the objectives of multivariate analysis is to enable multivariate forecasting.
Here we wish to predict the winter 3-day maximum precipitation over Southwest U.S. as a function of winter SSTs. We use CCA for this and the simplified steps are described below.
(i) Take the leading ~4 PCs of the winter SSTs (which you can obtain following the procedure in problem 4) and perform CCA with the leading ~4 PCs of precipitation.
(ii) Fit a regression for each canonical variate - canonical variate of precipitation related to canonical variate of SSTs. Use these regressions to predict the flow variates and back transform them to the original space - i.e., the precipitation space.
(iii) Evaluate the performance by computing R2 between the observed and predicted summer precipitation at each grid point.
(iv) Do a leave-one-out cross validation Compare with results from problem 6.
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#### Initialize Graphics
graphics.off()
.pardefault <- par()
#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 02/"
setwd(mainDir)
suppressPackageStartupMessages(source("hw2_library.R"))
source("hw2_library.R")
# Unload packages that create a conflict with map("world2", add = T)
detach(package:mclust)
detach(package:sm)
detach(package:kohonen)
nrows=72
ncols=36
ntime = 118 #Nov-Mar 1901 - Nov-Mar 2018
nyrs = 55 # Nov-Mar 1964 - Nov-Mar 2018
nglobe = nrows*ncols
N = nrows*ncols
### Lat - Long grid..
locs=matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/files-4HW2/sst-lat-long.txt"), ncol=2, byrow=T)
ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid)
xgrid=seq(27.5,382.5,by=5)
#xgrid[xgrid > 180]=xgrid[xgrid > 180]-360 #longitude on 0-360 grid if needed
xgrid[xgrid > 180]=xgrid[xgrid > 180]
nx=length(xgrid)
xygrid=matrix(0,nrow=nx*ny,ncol=2)
i=0
for(iy in 1:ny){
for(ix in 1:nx){
i=i+1
xygrid[i,1]=ygrid[iy]
xygrid[i,2]=xgrid[ix]
}
}
data=readBin(paste(mainDir, "data/prob4/data.r4", sep = ""),what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")
data <- array(data = data, dim=c( nrows, ncols, ntime ) )
data = data[,,64:ntime]
data1=data[,,1]
# the lat -long data grid..
index=1:(nx*ny)
index1=index[data1 < 20 & data1 != "NaN"] # only non-missing data.
xygrid1=xygrid[index1,]
x1=xygrid1[,2]
#x1[x1 < 0]= x1[x1 < 0] + 360
#xygrid1[,2]=x1
nsites=length(index1)
data2=data1[index1]
### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstdata=matrix(NA,nrow=nyrs, ncol=nsites)
for(i in 1:nyrs){
data1=data[,,i]
index1=index[data1 < 20 & data1 != "NaN"]
data2=data1[index1]
sstdata[i,]=data2
}
sstannavg = sstdata
indexgrid = index1
rm("data") #remove the object data to clear up space
## write out the grid locations..
write(t(xygrid1),file="kaplan-sst-locs.txt",ncol=2)
#get variance matrix..
## scale the data
sstscale = scale(sstannavg)
zs=var(sstscale)
#do an Eigen decomposition..
zsvd=svd(zs)
#Principal Components...
pcs=t(t(zsvd$u) %*% t(sstscale))
#Eigen Values.. - fraction variance
lambdas=(zsvd$d/sum(zsvd$d))
npc = 4 # Take leading 4 PCs
sst_pcs = pcs[,1:npc]
sst_full = pcs
data = read.table(paste(mainDir, "data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
loc = read.table(paste(mainDir, "data/Precipitaton_data.txt", sep = ""), header = T)
lon = loc[,1]
lat = loc[,2]
precip = data[,2:ncol(data)]
precMean = apply(precip, 2, mean)
precSd = apply(precip, 2, sd)
## Perform PCA
# Get Variance
#get variance matrix..
## scale the data
precip_scale = scale(precip)
zs=var(precip_scale)
#do an Eigen decomposition..
zsvd=svd(zs)
#Principal Components...
pcs = t(t(zsvd$u) %*% t(precip_scale))
prec_eof = zsvd$u
prec_pcs = pcs[,1:npc]
prec_full = pcs
M=dim(sst_pcs)[2]
J=dim(prec_pcs)[2]
J=min(M,J)
N = length(prec_pcs[,1])
Qx1 = qr.Q(qr(sst_pcs))
Qy1 = qr.Q(qr(prec_pcs))
T11 = qr.R(qr(sst_pcs))
T22 = qr.R(qr(prec_pcs))
VV=t(Qx1) %*% Qy1
BB = solve(T22) %*% svd(VV)$v * sqrt(N-1)
wm1 = prec_pcs %*% BB
AA = solve(T11) %*% svd(VV)$u * sqrt(N-1)
vm1 = sst_pcs %*% AA
## correlate the forecasted rainfall with the historical winter precipitation
preccor = diag(cor(prec_pred,precip))
# plot of R^2 between the observed and predicted winter precipitation at each grid point
nlevel = 11
coltab=rev(brewer.pal(nlevel-1,'RdBu'))
zr=range(preccor)
break_a=round(seq(zr[1],zr[2],by=((zr[2]-zr[1])/(nlevel-1)))*100)/100
quilt.plot(lon, lat,preccor, xlim = range(-115,-101.3), ylim = range(29.5,42.5), nx=30, ny=30,col=coltab,zlim=zr,lab.breaks=break_a)
mtext(bquote(~ R^2 ~ "between the Observed and Predicted Southwest U.S. Winter Precipitation"), side = 3, line = 0.2, cex = 0.8)
grid(col = "gray70", lty = 2)
US(add = T, col = "gray40", lwd = 1, xlim = range(-115,-101.3), ylim = range(29.5,42.5))
box()
prec_loocv = matrix(0,nrow=nrow(precip_scale),ncol=ncol(precip_scale))
## If you wish to remove a component from the data, say first component.
for (i in 1:nrow(precip_scale)) {
for (j in 1:ncol(precip_scale)) {
prec_rem = precip_scale
prec_rem[i,j] = 0
# Now perform PCA on prec_rem
zs=var(prec_rem)
# do an Eigen decomposition..
zsvd=svd(zs)
# Principal Components...
pcs = t(t(zsvd$u) %*% t(prec_rem))
prec_eof = zsvd$u
prec_pcs = pcs[,1:npc]
prec_full = pcs
# Take the leading ~4 PCs of the winter SSTs and perform CCA with the leading ~4 PCs of precipitation
M=dim(sst_pcs)[2]
J=dim(prec_pcs)[2]
J=min(M,J)
N = length(prec_pcs[,1])
Qx1 = qr.Q(qr(sst_pcs))
Qy1 = qr.Q(qr(prec_pcs))
T11 = qr.R(qr(sst_pcs))
T22 = qr.R(qr(prec_pcs))
VV=t(Qx1) %*% Qy1
BB = solve(T22) %*% svd(VV)$v * sqrt(N-1)
wm1 = prec_pcs %*% BB
AA = solve(T11) %*% svd(VV)$u * sqrt(N-1)
vm1 = sst_pcs %*% AA
# Fit a regression for each canonical variate - canonical variate of precipitation related to canonical variate of SSTs
# Predict the winter precipitation PCS
betahat = solve(t(AA) %*% t(sst_pcs)%*% sst_pcs %*% AA) %*% t(AA) %*% t(sst_pcs) %*% prec_pcs
ypred = sst_pcs %*% AA %*% betahat
# first npc PCs from the PC forecast above and the remaining PCs are set to
# their means - i.e., 0
N1 = dim(prec_rem)[2]-(npc)
prec_pcs_pred = cbind(ypred,matrix(rep(0,N),ncol=N1,nrow=N))
# Keep only the first npc Eigen Vectors and set rest to zero
E = matrix(0,nrow=dim(prec_rem)[2],ncol=dim(prec_rem)[2])
E[,1:npc]=prec_eof[,1:npc]
# back transform to get the winter precipitation field
prec_pred = prec_pcs_pred %*% t(E)
# rescale the winter precipitation
prec_pred = t(t(prec_pred)*precSd + precMean)
# Replace missing data
prec_loocv[i,j] = prec_pred[i,j]
}
}
## correlate the forecasted rainfall with the historical winter precipitation
loocvcor = diag(cor(prec_loocv,precip))
# plot of R^2 between the observed and predicted winter precipitation at each grid point
nlevel = 11
coltab=rev(brewer.pal(nlevel-1,'RdBu'))
zr=range(loocvcor)
break_a=round(seq(zr[1],zr[2],by=((zr[2]-zr[1])/(nlevel-1)))*100)/100
quilt.plot(lon, lat, loocvcor, xlim = range(-115,-101.3), ylim = range(29.5,42.5), nx=30, ny=30,col=coltab,zlim=zr,lab.breaks=break_a)
mtext(bquote(~ R^2 ~ "between the Observed and Predicted Southwest U.S. Winter Precipitation"), side = 3, line = 0.2, cex = 0.8)
grid(col = "gray70", lty = 2)
US(add = T, col = "gray40", lwd = 1, xlim = range(-115,-101.3), ylim = range(29.5,42.5))
box()