5. Repeat 5 by swapping summer rainfall with summer global tropical SSTs and vice-versa and, show correlation maps.

#### Clear Memory and Set Options
rm(list=ls())
options(warn=-1)
save <- FALSE  # Change to TRUE if you want to save the plot

# Set working directory (Modify if necessary)
script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(script_dir)

library(maps)
library(akima)
library(fields)
## Loading required package: spam
## Spam version 2.11-1 (2025-01-20) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## Loading required package: viridisLite
## 
## Try help(fields) to get started.
library(RColorBrewer)

myPalette <- colorRampPalette((brewer.pal(9, "RdBu")), space="Lab") # Blue high, red low

## read SST data
nrows_sst <- 180
ncols_sst <- 17
ntime = 67   #Jun-Sep 1950 - 2016

nglobe_sst = nrows_sst * ncols_sst
N_sst = nrows_sst * ncols_sst


### Lat - Long grid..
ygrid_sst = seq(-16,16,by=2)
ny_sst =length(ygrid_sst)

xgrid_sst=seq(0,358,by=2)
nx_sst=length(xgrid_sst)

xygrid_sst=matrix(0,nrow=nx_sst * ny_sst,ncol=2)

i=0
for(iy in 1:ny_sst){
for(ix in 1:nx_sst){
i=i+1
xygrid_sst[i,1]=ygrid_sst[iy]
xygrid_sst[i,2]=xgrid_sst[ix]
}

}
data_sst=readBin("data/NOAA-Trop-JJAS-SST-1950-2016.r4",what="numeric", n=( nrows_sst * ncols_sst * ntime), size=4,endian="swap")

data_sst <- array(data_sst, dim=c( nrows_sst, ncols_sst, ntime ) )

data1_sst=data_sst[,,1]
    

# the lat -long data grid..
index_sst=1:(nx_sst*ny_sst)

## pull only data and the locations with non-missing data
index1_sst=index_sst[data1_sst < 20 & data1_sst != "NaN"]   # only non-missing data.
xygrid1_sst=xygrid_sst[index1_sst,]

nsites_sst=length(index1_sst)

data2_sst=data1_sst[index1_sst]

### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstdata=matrix(NA,nrow=ntime, ncol=nsites_sst)


for(i in 1:ntime){
data1_sst=data_sst[,,i]
index1_sst=index_sst[data1_sst < 20 & data1_sst != "NaN"]
data2_sst=data1_sst[index1_sst]
sstdata[i,]=data2_sst
}

sstannavg = sstdata
indexgrid_sst = index1_sst
rm("data_sst")  #remove the object data to clear up space


## write out the grid locations..
write(t(cbind(xygrid1_sst, indexgrid_sst)),file="data/5-NOAA-trop-sst-locs.txt",ncol=3)
(i)Perform a PCA
###################### PCA 
##

#get variance matrix..
## scale the data
sstscale = scale(sstannavg)
zs=var(sstscale)

#do an Eigen decomposition..
zsvd=svd(zs)

#Principal Components...
sstpcs=t(t(zsvd$u) %*% t(sstscale))

#Eigen Values.. - fraction variance 
lambdas=(zsvd$d/sum(zsvd$d))

plot(1:25, lambdas[1:25], type="b", xlab="Modes", ylab="Frac. Var. explained")
grid()

print(sprintf("The first 4 PCs of explain %s%% of the variance", round(sum(lambdas[1:4])*100)))
## [1] "The first 4 PCs of explain 73% of the variance"
print(sprintf("The first 6 PCs of explain %s%% of the variance", round(sum(lambdas[1:6])*100)))
## [1] "The first 6 PCs of explain 80% of the variance"
#plots..

xlong = sort(unique(xygrid_sst[,2]))
ylat = sort(unique(xygrid_sst[,1]))

# Plot first 4 spatial eigenvectors
for (i in 1:4) {
zfull = rep(NaN,nglobe_sst)   #
zfull[indexgrid_sst]=zsvd$u[,2]
zmat = matrix(zfull,nrow=nrows_sst,ncol=ncols_sst)

 image.plot(xlong,ylat,zmat,ylim=range(-16,16),col=myPalette(200))
contour(xlong,ylat,(zmat),ylim=range(-16,16),add=TRUE,nlev=6,lwd=2)

  maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2)
  title(paste("PCA Spatial Mode", i))
 grid()
}

# Plot first 4 temporal PCs
for (i in 1:4) {
plot(1950:2016, sstpcs[,i],type="b",xlab="Year",ylab=paste("PC", i))
grid()
}

## ENSO index
nino4=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO4/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch")
nino34=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO34/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch")

for (i in 1:4) {
  r_val <- round(cor(scale(sstpcs[,i]), nino34), 3)
  print(bquote(.(paste("Correlation between PC", i, "and ENSO index:")) ~ R^2 == .(round(r_val, 3))))
}
## "Correlation between PC 1 and ENSO index:" ~ R^2 == -0.468
## "Correlation between PC 2 and ENSO index:" ~ R^2 == 0.826
## "Correlation between PC 3 and ENSO index:" ~ R^2 == 0.008
## "Correlation between PC 4 and ENSO index:" ~ R^2 == -0.128
Note that cor(nino34, sstpcs[,2]) gives the highest correlation meaning the second mode is ENSO
## plot the PCs

for (i in 1:4) {
plot(1950:2016, scale(sstpcs[,i]),type="b",xlab="Year",ylab=paste("PC", i))
lines(1950:2016, nino34, col="red")
grid()
}

Now perform PCA on sstanrem

(ii)Rotate first six PCS
zrot = varimax(zsvd$u[,1:6],normalize=FALSE)
sstrotpcs=t(t(zrot$loadings) %*% t(sstannavg))

## plot the first 4 rotated spatial mode and temporal PCs
xlong = sort(unique(xygrid_sst[,2]))
ylat = sort(unique(xygrid_sst[,1]))
par(mfrow = c(2, 2))
par(mar = c(3, 4, 2, 1))

for(i in 1:4){
zfull = rep(NaN,nglobe_sst)   #also equal 72*36
zfull[indexgrid_sst]=zrot$loadings[,3]
zmat = matrix(zfull,nrow=nrows_sst,ncol=ncols_sst)

 image.plot(xlong,ylat,zmat,ylim=range(-16,16),col=myPalette(200))
 mtext(paste("Rotated EOF no.",i,sep=""), side = 3, line = 0.2, cex = 0.8)
 contour(xlong,ylat,(zmat),ylim=range(-16,16),add=TRUE,nlev=6,lwd=2)
 maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2)
 box()
 
 plot(1950:2016, scale(sstrotpcs[,i]),type="b",xlab="Year",ann=FALSE)
 mtext(paste("Rotated PC no.",i,sep=""), side = 3, line = 0.2, cex = 0.8)

}

#### Plot rotated PCs and ENSO

for (i in 1:4) {
  r_val <- round(cor(scale(sstrotpcs[,i]), nino34), 3)
  print(bquote(.(paste("the correlation between rotated PC no.", i, "and ENSO index:")) ~ R^2 == .(round(r_val, 3))))
}
## "the correlation between rotated PC no. 1 and ENSO index:" ~ 
##     R^2 == -0.498
## "the correlation between rotated PC no. 2 and ENSO index:" ~ 
##     R^2 == 0.856
## "the correlation between rotated PC no. 3 and ENSO index:" ~ 
##     R^2 == 0.772
## "the correlation between rotated PC no. 4 and ENSO index:" ~ 
##     R^2 == -0.453
for (i in 1:4) {
plot(1950:2016, scale(sstrotpcs[,i]),type="b",xlab="Year",ann=FALSE)
mtext(paste("Rotated PC no.",i,sep=""), side = 3, line = 0.2, cex = 0.8)
lines(1950:2016, nino34, col="red")
}

Comments
SST PCA

The first mode explains nearly 40% of the total variance,indicating a strong dominant pattern in SST variability.The second mode explains nearly 20% of the total variance.The first 4 PCs of explain 73% of the variance.

Rotated SST PCA

Both PCA and rotated PCA successfully extract the ENSO mode. Rotated PCA spreads ENSO variability over multiple components (esp. RPC2 and RPC3), which improves spatial localization but makes temporal interpretation slightly less direct.

Correlated PCs of SSTs with Rain field

nrows <- 68
ncols <- 65
ntime <- 67   #Jun-Sep 1950 - 2016

nglobe <- nrows * ncols
N = nrows * ncols


### Lat - Long grid..
ygrid=seq(6.5,38.5,by=0.5)
ny=length(ygrid)

xgrid=seq(66.5,100,by=0.5)
nx=length(xgrid)

xygrid=matrix(0,nrow=nx*ny,ncol=2)

i=0
for(iy in 1:ny){
for(ix in 1:nx){
i=i+1
xygrid[i,1]=ygrid[iy]
xygrid[i,2]=xgrid[ix]
}

}
data=readBin("data/India-Rain-JJAS-05deg-1950-2016.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")

data <- array(data = data, dim=c( nrows, ncols, ntime ) )

data1=data[,,1]
    

# the lat -long data grid..
index=1:(nx*ny)

## pull only data and the locations with non-missing data
index1=index[data1 != "NaN"]    # only non-missing data.
xygrid1=xygrid[index1,]

nsites=length(index1)

data2=data1[index1]


### Rain data matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
raindata=matrix(NA,nrow=ntime, ncol=nsites)


for(i in 1:ntime){
data1=data[,,i]
index1=index[ data1 != "NaN"]
data2=data1[index1]
raindata[i,]=data2
}

index = 1:dim(raindata)[2]

xx = apply(raindata,2,mean)
index2 = index1[xx > 0]
index3 = index[xx > 0]

xygrid1=xygrid[index2,]
rainavg = raindata[,index3]

indexgrid = index2
rm("data")  #remove the object data to clear up space


## write out the grid locations..
write(t(cbind(xygrid1,indexgrid)),file="data/5-India-rain-locs.txt",ncol=3)
rainlocs = read.table("data/5-India-rain-locs.txt")

xlong = xgrid
ylat = ygrid

for (i in 1:4) {
  xcor = cor(sstpcs[,i], rainavg)
  zfull = rep(NaN,nx*ny)   #
  zfull[rainlocs[,3]]=xcor
zmat = matrix(zfull,nrow=nx,ncol=ny)
 
  image.plot(xlong,ylat,zmat,ylim=range(5,40),col=myPalette(200),zlim=c(-0.5,0.5))
  contour(xlong,ylat,(zmat),ylim=range(5,40),add=TRUE,nlev=3,lwd=2)
  title(main = paste("Correlation of SST PC", i, "with rain field"))
  maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2)
 grid()
}