4. PCA - Field Correlations
You wish to identify the space-time patterns of variability of the winter (Dec - Mar) 3-day maximum precipitation and SSTs for the 1964 - 2018 period
(i) Perform a PCA on the winter global SST anomalies
- Plot the Eigen variance spectrum for the first 25 modes
- Plot the leading 4 spatial (Eigen vectors) and temporal (PCs) modes of the variability
(ii) Perform a rotated PCA (rotate the first 6 PCs) and plot the leading 4 spatial and temporal modes of variability. Compare the results with (i) above
(iv) Correlate the first four PCs of rainfall with the SSTs and vice-versa and show correlation maps

#### CLEAR MEMORY
rm(list=ls())
#### Prevent Warnings
options(warn=-1)
#### Initialize Graphics
graphics.off()
.pardefault <- par()

#### Source Libraries and set working directory
mainDir="/Users/annastar/OneDrive - UCB-O365/CVEN 6833 - Advanced Data Analysis Techniques/Homework/HW 02/"
setwd(mainDir)
suppressPackageStartupMessages(source("hw2_library.R"))
source("hw2_library.R")
# Unload packages that create a conflict with map("world2", add = T)
detach(package:mclust)
detach(package:sm)
detach(package:kohonen)

nrows=72
ncols=36
ntime = 118    #Nov-Mar 1901 - Nov-Mar 2018
nyrs = 55       # Nov-Mar 1964 - Nov-Mar 2018

nglobe = nrows*ncols
N = nrows*ncols

### Lat - Long grid..
locs=matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/files-4HW2/sst-lat-long.txt"), ncol=2, byrow=T)
ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid)

xgrid=seq(27.5,382.5,by=5)
#xgrid[xgrid > 180]=xgrid[xgrid > 180]-360  #longitude on 0-360 grid if needed
xgrid[xgrid > 180]=xgrid[xgrid > 180]
nx=length(xgrid)

xygrid=matrix(0,nrow=nx*ny,ncol=2)

i=0
for(iy in 1:ny){
  for(ix in 1:nx){
    i=i+1
    xygrid[i,1]=ygrid[iy]
    xygrid[i,2]=xgrid[ix]
  }
  
}

Read Kaplan SST data

data=readBin(paste(mainDir, "data/prob4/data.r4", sep = ""),what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")
data <- array(data = data, dim=c( nrows, ncols, ntime ) )
data = data[,,64:ntime]

data1=data[,,1]

# the lat -long data grid..

index=1:(nx*ny)

index1=index[data1 < 20 & data1 != "NaN"]   # only non-missing data.
xygrid1=xygrid[index1,]

x1=xygrid1[,2]
#x1[x1 < 0]= x1[x1 < 0] + 360
#xygrid1[,2]=x1

nsites=length(index1)
data2=data1[index1]

### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstdata=matrix(NA,nrow=nyrs, ncol=nsites)

for(i in 1:nyrs){
  data1=data[,,i]
  index1=index[data1 < 20 & data1 != "NaN"]
  data2=data1[index1]
  sstdata[i,]=data2
}

sstannavg = sstdata
indexgrid = index1
rm("data")  #remove the object data to clear up space

## write out the grid locations..
write(t(xygrid1),file="kaplan-sst-locs.txt",ncol=2)

(i) Perform a PCA on the winter global SST anomalies

#get variance matrix..
## scale the data
sstscale = scale(sstannavg)
zs=var(sstscale)

#do an Eigen decomposition..
zsvd=svd(zs)

#Principal Components...
pcs=t(t(zsvd$u) %*% t(sstscale))

#Eigen Values.. - fraction variance 
lambdas=(zsvd$d/sum(zsvd$d))

plot(1:25, lambdas[1:25], type="b", xlab="Modes", ylab="Frac. Var. Explained", main = "Eigen Variance Spectrum")
grid()

print(sprintf("The first 4 PCs of explain %s%% of the variance", round(sum(lambdas[1:4])*100)))
## [1] "The first 4 PCs of explain 54% of the variance"
# the data is on a grid so fill the entire global grid with NaN and then populate the ocean grids with
# the Eigen vector
xlong = sort(unique(xygrid[,2]))
ylat = sort(unique(xygrid[,1]))

par(mfrow = c(4,2))
par(mar = c(2, 5, 2, 1))
for (i in 1:4) {
  # Spatial (Eigenvector)
  zfull = rep(NaN,nglobe)   #also equal 72*36
  zfull[indexgrid]=zsvd$u[,i]
  zmat = matrix(zfull,nrow=nrows,ncol=ncols)
  image.plot(xlong,ylat,zmat,ylim=range(-40,70))
  mtext(paste("EOF", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
  contour(xlong,ylat,(zmat),ylim=range(-40,70),add=TRUE,nlev=6,lwd=2)
  map("world2",add=T)
  
  # Temporal (PC)
  plot(1964:2018, scale(pcs[,i]),type="b", axes = FALSE, ann = FALSE)
  axis(2)
  axis(1)
  mtext(paste("PC", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
}

par(.pardefault)

## ENSO index
nino4=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO4/T/(Nov%201899)/(Mar%202018)/RANGE/T/(Nov-Mar)/seasonalAverage/data.ch")
nino4 = nino4[64:ntime]

## PC2 will be ENSO
plot(1964:2018, scale(pcs[,2]),type="b",xlab="Year",ylab="PC2")
lines(1964:2018, nino4, col="red")

print(bquote("Correlation between PC 2 and ENSO index: "~ R^2 == .(round(cor(scale(pcs[,2]),nino4)*1000)/1000)))
## "Correlation between PC 2 and ENSO index: " ~ R^2 == 0.104
## Note that cor(nino4, pcs[,2]) gives the highest correlation
##meaning the second mode is ENSO

(ii) Perform a rotated PCA (rotate the first 6 PCs) and plot the leading 4 spatial and temporal modes of variability.

## Rotate the first 6 PCs
zrot = varimax(zsvd$u[,1:6],normalize=FALSE)

## plot the first 4 rotated spatial mode
xlong = sort(unique(xygrid[,2]))
ylat = sort(unique(xygrid[,1]))
rotpcs=t(t(zrot$loadings) %*% t(sstannavg))

par(mfrow = c(4,2))
par(mar = c(2, 5, 2, 1))
for (i in 1:4) {
  # Spatial (Eigenvector)
  zfull = rep(NaN,nglobe)   #also equal 72*36
  zfull[indexgrid]=zrot$loadings[,i]
  zmat = matrix(zfull,nrow=nrows,ncol=ncols)
  image.plot(xlong,ylat,zmat,ylim=range(-40,70))
  mtext(paste("Rotated EOF", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
  contour(xlong,ylat,(zmat),ylim=range(-40,70),add=TRUE,nlev=6,lwd=2)
  map("world2",add=T)
  
  # Temporal (PC)
  plot(1964:2018, scale(rotpcs[,i]),type="b", axes = FALSE, ann = FALSE)
  axis(2)
  axis(1)
  mtext(paste("Rotated PC", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
}

par(.pardefault)

## PC2 will be ENSO
plot(1964:2018, scale(rotpcs[,2]),type="b",xlab="Year",ylab="PC2")
lines(1964:2018, nino4, col="red")

print(bquote("Correlation between Rotated PC 2 and ENSO index: "~ R^2 == .(round(cor(scale(rotpcs[,2]),nino4)*1000)/1000)))
## "Correlation between Rotated PC 2 and ENSO index: " ~ R^2 == 
##     0.07

Compare the results with (i) above

SST PCA
- The first 4 PCs of winter SSTs explain the majority (54%) of the variance
- PC 1 indicates a negative temporal trend after ~2000 that seems to correspond to cooling between -20\(^\circ\) and +20\(^\circ\) latitude and heating north of 40\(^\circ\) and south of -40\(^\circ\)
- PC 2 is not strongly correlated with the ENSO index for 1964 - 2018
Rotated SST PCA
- Rotated PC 1 indicates a consistent negative temporal trend after ~1985 that corresponds to cooling in the Pacific between -20\(^\circ\) and +20\(^\circ\) latitude and a stronger heating pattern everywhere else as compared to the unrotated EOF 1
- PC 2 is not correlated to the ENSO index (R2 < 0.1)

(iv) Correlate the first four PCs of rainfall with the SSTs and vice-versa and show correlation maps

## Read winter precipitation data
# Load winter precipitation data
## modify data input so that data is in matrix form
data = read.table(paste(mainDir, "data/Winter_temporal/Max_Winter_Seas_Prec.txt", sep = ""), header = T)
loc = read.table(paste(mainDir, "data/Precipitaton_data.txt", sep = ""), header = T)
lon = loc[,1]
lat = loc[,2]

years = data[,1]
precip = data[,2:ncol(data)]

## Perform PCA
# Get Variance
#get variance matrix..
## scale the data
precip_scale = scale(precip)
zs=var(precip_scale)

#do an Eigen decomposition..
zsvd=svd(zs)

#Principal Components...
precip_pcs = t(t(zsvd$u) %*% t(precip_scale))

#Eigen Values.. - fraction variance 
lambdas=(zsvd$d/sum(zsvd$d))

plot(1:25, lambdas[1:25], type="b", xlab="Modes", ylab="Frac. Var. Explained", main = "Eigen Variance Spectrum")
grid()

print(sprintf("The first 4 PCs of explain %s%% of the variance", round(sum(lambdas[1:4])*100)))
## [1] "The first 4 PCs of explain 44% of the variance"
print(sprintf("The first 6 PCs of explain %s%% of the variance", round(sum(lambdas[1:6])*100)))
## [1] "The first 6 PCs of explain 54% of the variance"
## Plot winter precipitation eigenvectors (spatial) and PCs (temporal)
par(mfrow = c(4,2))
par(mar = c(2, 5, 2, 1))
for (i in 1:4) {
  # Spatial (Eigenvector)
  quilt.plot(lon, lat, zsvd$u[,i], xlim = range(-115,-101.3), ylim = range(29.5,42.5), ann = F)
  mtext(paste("EOF", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
  grid(col = "gray70", lty = 2)
  US(add = T, col = "gray40", lwd = 1, xlim = range(-115,-101.3), ylim = range(29.5,42.5))
  
  # Temporal (PC)
  plot(1964:2018, scale(precip_pcs[,i]),type="b", axes = FALSE, ann = FALSE)
  axis(2)
  axis(1)
  mtext(paste("PC", i, sep = " "), side = 3, line = 0.2, cex = 0.8)
}

par(.pardefault)

## Correlate the first four PCs of winter SSTs with that of precipitation
cor_pc = cor(precip_pcs[,1:4],pcs[,1:4])    # Correlation between PCs
par(mfrow = c(4, 4))
par(oma=c(2,0,4,0)) # save some room for the legend
par(mar = c(4, 4, 2, 1))
for (i in 1:4) {
  for (j in 1:4) {
    if(i==1 & j==1){
      plot(pcs[,j],precip_pcs[,i],type="p",xlab=paste("R2=",round(cor_pc[i,j]*1000)/1000,sep = ""),ylab=as.character(i),xlim=c(-20,20),ylim=c(-10,10),main=as.character(j),axes=FALSE,font.lab=2)
      axis(2)
      axis(1)
    }else if(i==1){
      plot(pcs[,j],precip_pcs[,i],type="p",xlab=paste("R2=",round(cor_pc[i,j]*1000)/1000,sep = ""),ylab="",xlim=c(-20,20),ylim=c(-10,10),main=as.character(j),axes=FALSE,font.lab=2 )
      axis(2)
      axis(1)
    }else if(j==1){
      plot(pcs[,j],precip_pcs[,i],type="p",xlab=paste("R2=",round(cor_pc[i,j]*1000)/1000,sep = ""),ylab=as.character(i),xlim=c(-20,20),ylim=c(-10,10),axes=FALSE,font.lab=2 )
      axis(2)
      axis(1)
    }
    else {
      plot(pcs[,j],precip_pcs[,i],type="p",xlab=paste("R2=",round(cor_pc[i,j]*1000)/1000,sep = ""),ylab="",xlim=c(-20,20),ylim=c(-10,10),axes=FALSE,font.lab=2 )
      axis(2)
      axis(1)
    }}}
mtext("Correlation between Winter Global SST Anomalies and Southwest Winter Precipitation", outer = TRUE, side = 3, cex = 0.75, line = 1)

Discussion