4. Summer season (June-September total) rainfall over India exhibits
significant spatial and temporal variability. You wish to identify the
space-time patterns of variability of. To this end,
- Perform a PCA on the summer season rainfall (cm) on a 0.5o x 0.5o
grid (i.e. the ‘Rajeevan grid’) over India is available for the period
1901 to 2016. Perform the analysis for the 1950 ~ 2016 period –Plot the
Eigen variance spectrum for the first 25 modes –Plot the leading 4
spatial (Eigen vectors) and temporal (PCs) modes of variability
- Perform a rotated PCA (rotate the first 6 PCs) and plot the leading
4 spatial and temporal modes of variability. Compare the results with
(i) above.
- Correlate the first four PCs (uncorrelated) of rainfall with summer
global tropical Sea Surface Temperatures (SSTs) and show correlation
maps.
# Load libraries
library(maps)
library(akima)
## Warning: package 'akima' was built under R version 4.4.3
library(fields)
## Loading required package: spam
## Spam version 2.11-1 (2025-01-20) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
##
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
##
## backsolve, forwardsolve
## Loading required package: viridisLite
##
## Try help(fields) to get started.
library(RColorBrewer)
#### Clear Memory and Set Options
rm(list=ls())
options(warn=-1)
save <- FALSE # Change to TRUE if you want to save the plot
# Set working directory (Modify if necessary)
script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(script_dir)
# Color palette
myPalette <- colorRampPalette(rev(brewer.pal(9, "RdBu")), space="Lab")
##0.25 deg grid
#nrows <- 135
#ncols <- 129
### 0.5 deg grid
nrows=68
ncols=65
ntime <- 67 #Jun-Sep 1950 - 2016
nyrs <- ntime
nglobe <- nrows*ncols
N <- nrows*ncols
### Lat - Long grid..
#ygrid=seq(6.5,38.5,by=0.25)
ygrid=seq(6.5,38.5,by=0.5)
ny=length(ygrid)
#xgrid=seq(66.5,100,by=0.25)
xgrid=seq(66.5,100,by=0.5)
nx=length(xgrid)
xygrid=matrix(0,nrow=nx*ny,ncol=2)
i=0
for(iy in 1:ny){
for(ix in 1:nx){
i=i+1
xygrid[i,1]=ygrid[iy]
xygrid[i,2]=xgrid[ix]
}
}
# Read India Gridded Rainfall data..
data=readBin("data/India-Rain-JJAS-05deg-1950-2016.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")
#data=readBin("data.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")
data <- array(data = data, dim=c( nrows, ncols, ntime ) )
data1=data[,,1]
# the lat -long data grid..
index=1:(nx*ny)
## pull only data and the locations with non-missing data
index1=index[data1 != "NaN"] # only non-missing data.
xygrid1=xygrid[index1,]
nsites=length(index1)
data2=data1[index1]
### Rain data matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
raindata=matrix(NA,nrow=nyrs, ncol=nsites)
for(i in 1:nyrs){
data1=data[,,i]
index1=index[ data1 != "NaN"]
data2=data1[index1]
raindata[i,]=data2
}
index = 1:dim(raindata)[2]
xx = apply(raindata,2,mean)
index2 = index1[xx > 0]
index3 = index[xx > 0]
xygrid1=xygrid[index2,]
rainavg = raindata[,index3]
indexgrid = index2
rm("data") #remove the object data to clear up space
## write out the grid locations..
write(t(cbind(xygrid1,indexgrid)),file="data/4-India-rain-locs.txt",ncol=3)
(i)Perform a PCA
###################### PCA
##
# Standardize and perform PCA
rainscale = scale(rainavg)
zs=var(rainscale)
#do an Eigen decomposition..
zsvd=svd(zs)
#Principal Components...
rainpcs=t(t(zsvd$u) %*% t(rainscale))
#Eigen Values.. - fraction variance
lambdas=(zsvd$d/sum(zsvd$d))
plot(1:25, lambdas[1:25], type="b", xlab="Modes", ylab="Frac. Var. explained") #first 25 models
grid()

print(sprintf("The first 4 PCs of explain %s%% of the variance", round(sum(lambdas[1:4])*100)))
## [1] "The first 4 PCs of explain 34% of the variance"
print(sprintf("The first 6 PCs of explain %s%% of the variance", round(sum(lambdas[1:6])*100)))
## [1] "The first 6 PCs of explain 42% of the variance"
#plots..
# the data is on a grid so fill the entire global grid with NaN and then populate the ocean grids with the Eigen vector
xlong = sort(unique(xygrid[,2]))
ylat = sort(unique(xygrid[,1]))
# Plot first 4 spatial eigenvectors
for (k in 1:4) {
zfull = rep(NaN, nglobe)
zfull[indexgrid] = zsvd$u[,k]
zmat = matrix(zfull, nrow=nrows, ncol=ncols)
image.plot(xlong, ylat, zmat, ylim=range(5,40), col=myPalette(200))
contour(xlong, ylat, zmat, ylim=range(5,40), add=TRUE, nlev=3, lwd=2)
maps::map('world', wrap=c(0,360), add=TRUE, resolution=0, lwd=2)
title(paste("PCA Spatial Mode", k))
grid()
}




# Plot first 4 temporal PCs
for (k in 1:4) {
plot(1950:2016, rainpcs[,k], type="b", xlab="Year", ylab=paste("PC", k))
grid()
}




## ENSO index
nino4=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO4/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch")
nino34=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO34/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch")
for (k in 1:4) {
r_val <- round(cor(scale(rainpcs[,k]), nino34), 3)
print(bquote(.(paste("Correlation between PC", k, "and ENSO index:")) ~ R^2 == .(round(r_val, 3))))
}
## "Correlation between PC 1 and ENSO index:" ~ R^2 == 0.475
## "Correlation between PC 2 and ENSO index:" ~ R^2 == -0.037
## "Correlation between PC 3 and ENSO index:" ~ R^2 == -0.425
## "Correlation between PC 4 and ENSO index:" ~ R^2 == -0.081
Note that cor(nino34, pcs[,1]) gives the highest correlation meaning
the first mode is ENSO
## plot the PCs
## PC1 will be linked to ENSO
for (k in 1:4) {
plot(1950:2016, scale(rainpcs[,k]),type="b",xlab="Year",ylab=paste("PC", k))
lines(1950:2016, nino34, col="red")
grid()
}




(ii)Rotate first six PCS
zrot = varimax(zsvd$u[,1:6],normalize=FALSE)
rainrotpcs=t(t(zrot$loadings) %*% t(rainavg))
## plot the first 4 rotated spatial mode
xlong = sort(unique(xygrid[,2]))
ylat = sort(unique(xygrid[,1]))
par(mfrow = c(2, 2))
par(mar = c(3, 4, 2, 1))
for(i in 1:4){
zfull = rep(NaN,nglobe)
zfull[indexgrid]=zrot$loadings[,i]
zmat = matrix(zfull,nrow=nrows,ncol=ncols)
image.plot(xlong,ylat,zmat,ylim=range(5,40),col=myPalette(200))
mtext(paste("Rotated EOF no.",i,sep=""), side = 3, line = 0.2, cex = 0.8)
contour(xlong,ylat,(zmat),ylim=range(5,40),add=TRUE,nlev=3,lwd=2)
maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2)
box()
plot(1950:2016, scale(rainrotpcs[,i]),type="b",xlab="Year",ann=FALSE)
mtext(paste("Rotated PC no.",i,sep=""), side = 3, line = 0.2, cex = 0.8)
}


Plot rotated PCs and ENSO
for (i in 1:4) {
r_val <- round(cor(scale(rainrotpcs[,i]), nino34), 3)
print(bquote(.(paste("the correlation between rotated PC no.", k, "and ENSO index:")) ~ R^2 == .(round(r_val, 3))))
}
## "the correlation between rotated PC no. 4 and ENSO index:" ~
## R^2 == 0.247
## "the correlation between rotated PC no. 4 and ENSO index:" ~
## R^2 == -0.415
## "the correlation between rotated PC no. 4 and ENSO index:" ~
## R^2 == -0.515
## "the correlation between rotated PC no. 4 and ENSO index:" ~
## R^2 == 0.118
for (i in 1:4) {
plot(1950:2016, scale(rainrotpcs[,i]),type="b",xlab="Year",ann=FALSE)
mtext(paste("Rotated PC no.",i,sep=""), side = 3, line = 0.2, cex = 0.8)
lines(1950:2016, nino34, col="red")
}




(iii)Correlated PCs with SST fields
nrows_sst <- 180
ncols_sst <- 17
ntime = 67 #Jun-Sep 1950 - 2016
nglobe_sst = nrows_sst * ncols_sst
N_sst = nrows_sst * ncols_sst
### Lat - Long grid..
ygrid_sst = seq(-16,16,by=2)
ny_sst =length(ygrid_sst)
xgrid_sst=seq(0,358,by=2)
nx_sst=length(xgrid_sst)
xygrid_sst=matrix(0,nrow=nx_sst * ny_sst,ncol=2)
i=0
for(iy in 1:ny_sst){
for(ix in 1:nx_sst){
i=i+1
xygrid_sst[i,1]=ygrid_sst[iy]
xygrid_sst[i,2]=xgrid_sst[ix]
}
}
data_sst=readBin("data/NOAA-Trop-JJAS-SST-1950-2016.r4",what="numeric", n=( nrows_sst * ncols_sst * ntime), size=4,endian="swap")
data_sst <- array(data_sst, dim=c( nrows_sst, ncols_sst, ntime ) )
data1_sst=data_sst[,,1]
# the lat -long data grid..
index_sst=1:(nx_sst*ny_sst)
## pull only data and the locations with non-missing data
index1_sst=index_sst[data1_sst < 20 & data1_sst != "NaN"] # only non-missing data.
xygrid1_sst=xygrid_sst[index1_sst,]
nsites_sst=length(index1_sst)
data2_sst=data1_sst[index1_sst]
### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstdata=matrix(NA,nrow=nyrs, ncol=nsites_sst)
for(i in 1:nyrs){
data1_sst=data_sst[,,i]
index1_sst=index_sst[data1_sst < 20 & data1_sst != "NaN"]
data2_sst=data1_sst[index1_sst]
sstdata[i,]=data2_sst
}
sstannavg = sstdata
indexgrid_sst = index1_sst
rm("data_sst") #remove the object data to clear up space
## write out the grid locations..
write(t(cbind(xygrid1_sst, indexgrid_sst)),file="data/4-NOAA-trop-sst-locs.txt",ncol=3)
sstlocs = read.table("data/4-NOAA-trop-sst-locs.txt")
xlong <- xgrid_sst
ylat <- ygrid_sst
nx_sst <- length(xlong)
ny_sst <- length(ylat)
for (i in 1:4) {
xcor = cor(rainpcs[,i], sstannavg)
zfull = rep(NaN,nx_sst*ny_sst)
zfull[sstlocs[,3]]=xcor
zmat = matrix(zfull,nrow=nx_sst,ncol=ny_sst)
image.plot(xlong,ylat,zmat,ylim=range(-16,16),col=myPalette(200),zlim=c(-0.5,0.5))
contour(xlong,ylat,(zmat),ylim=range(-16,16),add=TRUE,nlev=6,lwd=2)
title(main = paste("Correlation of Rainfall PC", i, "with SSTs"))
maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2)
grid()
}




Comments
PCA
PC1 explains about 14–15% of the total variance.The remaining variance is distributed across many modes. The first 6 PCs of explain 42% of the variance. This means PCA does not capture the variance in a single dominant mode. PC1 also shows the strongest correlation with the ENSO index (R = +0.475), suggesting that the leading mode is most strongly associated with ENSO-related variability.
Rotated PCA
The ENSO signal is spread across multiple rotated modes: Rotated PC1 shows a modest positive correlation (R = +0.247), while Rotated PC3 exhibits the strongest (negative) correlation with ENSO (R = –0.515).
Rotation improves the spatial localization of variability patterns, making them easier to interpret regionally. However, this comes at the cost of weakening the direct alignment of ENSO with a single dominant mode, as seen in the standard PCA.