4. PCA - Field Correlations
You wish to identify the space-time patterns of variability of the winter (Dec - Mar) 3-day maximum precipitation and SSTs for the 1964 - 2018 period
(i) Perform a PCA on the winter global SST anomalies
- Plot the Eigen variance spectrum for the first 25 modes
- Plot the leading 4 spatial (Eigen vectors) and temporal (PCs) modes of the variability
(ii) Perform a rotated PCA (rotate the first 6 PCs) and plot the leading 4 spatial and temporal modes of variability. Compare the results with (i) above
(iv) Correlate the first four PCs of rainfall with the SSTs and vice-versa and show correlation maps
#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#### Initialize Graphics
graphics.off()
.pardefault <- par()
#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 02/"
setwd(mainDir)
suppressPackageStartupMessages(source("hw2_library.R"))
source("hw2_library.R")
# Unload packages that create a conflict with map("world2", add = T)
detach(package:mclust)
detach(package:sm)
detach(package:kohonen)
nrows=72
ncols=36
ntime = 118 #Nov-Mar 1901 - Nov-Mar 2018
nyrs = 55 # Nov-Mar 1964 - Nov-Mar 2018
nglobe = nrows*ncols
N = nrows*ncols
### Lat - Long grid..
locs=matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/files-4HW2/sst-lat-long.txt"), ncol=2, byrow=T)
ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid)
xgrid=seq(27.5,382.5,by=5)
#xgrid[xgrid > 180]=xgrid[xgrid > 180]-360 #longitude on 0-360 grid if needed
xgrid[xgrid > 180]=xgrid[xgrid > 180]
nx=length(xgrid)
xygrid=matrix(0,nrow=nx*ny,ncol=2)
i=0
for(iy in 1:ny){
for(ix in 1:nx){
i=i+1
xygrid[i,1]=ygrid[iy]
xygrid[i,2]=xgrid[ix]
}
}
data=readBin(paste(mainDir, "data/prob4/data.r4", sep = ""),what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")
data <- array(data = data, dim=c( nrows, ncols, ntime ) )
data = data[,,64:ntime]
data1=data[,,1]
# the lat -long data grid..
index=1:(nx*ny)
index1=index[data1 < 20 & data1 != "NaN"] # only non-missing data.
xygrid1=xygrid[index1,]
x1=xygrid1[,2]
#x1[x1 < 0]= x1[x1 < 0] + 360
#xygrid1[,2]=x1
nsites=length(index1)
data2=data1[index1]
### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstdata=matrix(NA,nrow=nyrs, ncol=nsites)
for(i in 1:nyrs){
data1=data[,,i]
index1=index[data1 < 20 & data1 != "NaN"]
data2=data1[index1]
sstdata[i,]=data2
}
sstannavg = sstdata
indexgrid = index1
rm("data") #remove the object data to clear up space
## write out the grid locations..
write(t(xygrid1),file="kaplan-sst-locs.txt",ncol=2)
#get variance matrix..
## scale the data
sstscale = scale(sstannavg)
zs=var(sstscale)
#do an Eigen decomposition..
zsvd=svd(zs)
#Principal Components...
pcs=t(t(zsvd$u) %*% t(sstscale))
#Eigen Values.. - fraction variance
lambdas=(zsvd$d/sum(zsvd$d))
plot(1:25, lambdas[1:25], type="b", xlab="Modes", ylab="Frac. Var. Explained", main = "Eigen Variance Spectrum")
grid()
print(sprintf("The first 4 PCs of explain %s%% of the variance", round(sum(lambdas[1:4])*100)))
## [1] "The first 4 PCs of explain 54% of the variance"
# the data is on a grid so fill the entire global grid with NaN and then populate the ocean grids with
# the Eigen vector
xlong = sort(unique(xygrid[,2]))
ylat = sort(unique(xygrid[,1]))
par(mfrow = c(4,2))
par(mar = c(2, 5, 2, 1))
for (i in 1:4) {
# Spatial (Eigenvector)
zfull = rep(NaN,nglobe) #also equal 72*36
zfull[indexgrid]=zsvd$u[,i]
zmat = matrix(zfull,nrow=nrows,ncol=ncols)
image.plot(xlong,ylat,zmat,ylim=range(-40,70))
mtext(paste("EOF", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
contour(xlong,ylat,(zmat),ylim=range(-40,70),add=TRUE,nlev=6,lwd=2)
map("world2",add=T)
# Temporal (PC)
plot(1964:2018, scale(pcs[,i]),type="b", axes = FALSE, ann = FALSE)
axis(2)
axis(1)
mtext(paste("PC", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
}
par(.pardefault)
## ENSO index
nino4=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO4/T/(Nov%201899)/(Mar%202018)/RANGE/T/(Nov-Mar)/seasonalAverage/data.ch")
nino4 = nino4[64:ntime]
## PC2 will be ENSO
plot(1964:2018, scale(pcs[,2]),type="b",xlab="Year",ylab="PC2")
lines(1964:2018, nino4, col="red")
print(bquote("Correlation between PC 2 and ENSO index: "~ R^2 == .(round(cor(scale(pcs[,2]),nino4)*1000)/1000)))
## "Correlation between PC 2 and ENSO index: " ~ R^2 == 0.104
## Note that cor(nino4, pcs[,2]) gives the highest correlation
##meaning the second mode is ENSO
## Rotate the first 6 PCs
zrot = varimax(zsvd$u[,1:6],normalize=FALSE)
## plot the first 4 rotated spatial mode
xlong = sort(unique(xygrid[,2]))
ylat = sort(unique(xygrid[,1]))
rotpcs=t(t(zrot$loadings) %*% t(sstannavg))
par(mfrow = c(4,2))
par(mar = c(2, 5, 2, 1))
for (i in 1:4) {
# Spatial (Eigenvector)
zfull = rep(NaN,nglobe) #also equal 72*36
zfull[indexgrid]=zrot$loadings[,i]
zmat = matrix(zfull,nrow=nrows,ncol=ncols)
image.plot(xlong,ylat,zmat,ylim=range(-40,70))
mtext(paste("Rotated EOF", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
contour(xlong,ylat,(zmat),ylim=range(-40,70),add=TRUE,nlev=6,lwd=2)
map("world2",add=T)
# Temporal (PC)
plot(1964:2018, scale(rotpcs[,i]),type="b", axes = FALSE, ann = FALSE)
axis(2)
axis(1)
mtext(paste("Rotated PC", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
}
par(.pardefault)
## PC2 will be ENSO
plot(1964:2018, scale(rotpcs[,2]),type="b",xlab="Year",ylab="PC2")
lines(1964:2018, nino4, col="red")
print(bquote("Correlation between Rotated PC 2 and ENSO index: "~ R^2 == .(round(cor(scale(rotpcs[,2]),nino4)*1000)/1000)))
## "Correlation between Rotated PC 2 and ENSO index: " ~ R^2 ==
## 0.07
SST PCA
- The first 4 PCs of winter SSTs explain the majority (54%) of the variance
- PC 1 indicates a negative temporal trend after ~2000 that seems to correspond to cooling between -20\(^\circ\) and +20\(^\circ\) latitude and heating north of 40\(^\circ\) and south of -40\(^\circ\)
- PC 2 is not strongly correlated with the ENSO index for 1964 - 2018
Rotated SST PCA
- Rotated PC 1 indicates a consistent negative temporal trend after ~1985 that corresponds to cooling in the Pacific between -20\(^\circ\) and +20\(^\circ\) latitude and a stronger heating pattern everywhere else as compared to the unrotated EOF 1
- PC 2 is not correlated to the ENSO index (R2 < 0.1)
## Read winter precipitation data
# Load winter precipitation data
## modify data input so that data is in matrix form
data = read.table(paste(mainDir, "data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
loc = read.table(paste(mainDir, "data/Precipitaton_data.txt", sep = ""), header = T)
lon = loc[,1]
lat = loc[,2]
years = data[,1]
precip = data[,2:ncol(data)]
## Perform PCA
# Get Variance
#get variance matrix..
## scale the data
precip_scale = scale(precip)
zs=var(precip_scale)
#do an Eigen decomposition..
zsvd=svd(zs)
#Principal Components...
precip_pcs = t(t(zsvd$u) %*% t(precip_scale))
#Eigen Values.. - fraction variance
lambdas=(zsvd$d/sum(zsvd$d))
plot(1:25, lambdas[1:25], type="b", xlab="Modes", ylab="Frac. Var. Explained", main = "Eigen Variance Spectrum")
grid()
print(sprintf("The first 4 PCs of explain %s%% of the variance", round(sum(lambdas[1:4])*100)))
## [1] "The first 4 PCs of explain 44% of the variance"
print(sprintf("The first 6 PCs of explain %s%% of the variance", round(sum(lambdas[1:6])*100)))
## [1] "The first 6 PCs of explain 54% of the variance"
## Plot winter precipitation eigenvectors (spatial) and PCs (temporal)
par(mfrow = c(4,2))
par(mar = c(2, 5, 2, 1))
for (i in 1:4) {
# Spatial (Eigenvector)
quilt.plot(lon, lat, zsvd$u[,i], xlim = range(-115,-101.3), ylim = range(29.5,42.5), ann = F)
mtext(paste("EOF", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
grid(col = "gray70", lty = 2)
US(add = T, col = "gray40", lwd = 1, xlim = range(-115,-101.3), ylim = range(29.5,42.5))
# Temporal (PC)
plot(1964:2018, scale(precip_pcs[,i]),type="b", axes = FALSE, ann = FALSE)
axis(2)
axis(1)
mtext(paste("PC", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
}
par(.pardefault)
## Correlate the first four PCs of winter SSTs with that of precipitation
cor_pc = cor(precip_pcs[,1:4],pcs[,1:4]) # Correlation between PCs
par(mfrow = c(4, 4))
par(oma=c(2,0,4,0)) # save some room for the legend
par(mar = c(4, 4, 2, 1))
for (i in 1:4) {
for (j in 1:4) {
if(i==1 & j==1){
plot(pcs[,j],precip_pcs[,i],type="p",xlab=paste("R2=",round(cor_pc[i,j]*1000)/1000,sep = ""),ylab=as.character(i),xlim=c(-20,20),ylim=c(-10,10),main=as.character(j),axes=FALSE,font.lab=2)
axis(2)
axis(1)
}else if(i==1){
plot(pcs[,j],precip_pcs[,i],type="p",xlab=paste("R2=",round(cor_pc[i,j]*1000)/1000,sep = ""),ylab="",xlim=c(-20,20),ylim=c(-10,10),main=as.character(j),axes=FALSE,font.lab=2 )
axis(2)
axis(1)
}else if(j==1){
plot(pcs[,j],precip_pcs[,i],type="p",xlab=paste("R2=",round(cor_pc[i,j]*1000)/1000,sep = ""),ylab=as.character(i),xlim=c(-20,20),ylim=c(-10,10),axes=FALSE,font.lab=2 )
axis(2)
axis(1)
}
else {
plot(pcs[,j],precip_pcs[,i],type="p",xlab=paste("R2=",round(cor_pc[i,j]*1000)/1000,sep = ""),ylab="",xlim=c(-20,20),ylim=c(-10,10),axes=FALSE,font.lab=2 )
axis(2)
axis(1)
}}}
mtext("Correlation between Winter Global SST Anomalies and Southwest Winter Precipitation", outer = TRUE, side = 3, cex = 0.75, line = 1)