Catalina Jerez
catalina.jerez@colorado.edu


We repeat Problem 4 by swapping summer rainfall with summer global tropical SSTs and vice-

1 Library & functions

1.1 Libraries

gc()
##           used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells  543173 29.1    1201782 64.2         NA   700280 37.4
## Vcells 1004863  7.7    8388608 64.0      36864  1963567 15.0
rm(list = ls())

# Load necessary libraries
spatialAnalysis.Lib     = c("fields", "maps", "rnaturalearth", "sf")
statisAnalysis.Lib      = c("psych")
dataManipulation.Lib    = c("dplyr", "reshape2", "tidyr", "tidyverse") 
dataVisualization.Lib   = c("ggplot2", "ggrepel", "ggpubr", "RColorBrewer")
list.packages           = unique(c(spatialAnalysis.Lib, statisAnalysis.Lib,
                                   dataManipulation.Lib, dataVisualization.Lib))
# Load all required packages
sapply(list.packages, require, character.only = TRUE)
##        fields          maps rnaturalearth            sf         psych 
##          TRUE          TRUE          TRUE          TRUE          TRUE 
##         dplyr      reshape2         tidyr     tidyverse       ggplot2 
##          TRUE          TRUE          TRUE          TRUE          TRUE 
##       ggrepel        ggpubr  RColorBrewer 
##          TRUE          TRUE          TRUE

1.2 Functions

Below we define functions for plotting the eigenvalue spectrum, spatial patterns, temporal PC series, and SST fields.

# constants 
text.size    = 15
customize.bw = theme_bw(base_family = "Times") +
  theme(
    axis.text.x        = element_text(size = text.size, vjust = 0.5),
    axis.text.y        = element_text(size = text.size),
    axis.title         = element_text(size = text.size + 2, face = "bold"),
    plot.title         = element_text(size = text.size + 2, face = "bold"),
    
    legend.position    = "bottom",
    legend.title       = element_text(size = text.size, face = "bold"),
    legend.text        = element_text(size = text.size),
    
    panel.border       = element_rect(color = "#000000", fill = NA, linewidth = 0.5),
    panel.grid.minor.x = element_line(color = "#d9d9d9", linetype = "dotted"),
    panel.grid.minor.y = element_line(color = "#d9d9d9", linetype = "dotted"),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank()
  )
# color palettes
palette.rdbu = colorRampPalette(rev(brewer.pal(9, "RdBu")))(200)
# world map
world.sf     = ne_countries(scale = "medium", returnclass = "sf") %>%
    st_wrap_dateline(options = c("WRAPDATELINE=YES", "DATELINEOFFSET=180")) %>%
    st_transform("+proj=longlat +datum=WGS84")

#  plot PCA
plot.eigenSpectrum  = function(pca_result, n = 15) {
  eigvals  = pca_result$sdev^2
  frac_var = eigvals / sum(eigvals)
  df       = data.frame(Mode = 1:length(frac_var), Variance = frac_var)
  breaks   = seq(0, 15, 3); breaks[1] = 1
  ggplot(df[1:n, ], aes(x = Mode, y = Variance)) +
    geom_line(color = "#636363") + 
    geom_point(color = "#000", fill = "#bdbdbd", shape = 21, size = 3) +
    scale_x_continuous(breaks = breaks)+
    labs(y = "Fraction Variance", x = "Mode",title = "PCA Eigenvalue Spectrum")+
    theme_bw(base_size = 12, base_family = "Times") + 
    customize.bw
}

# plot spatial patterns
plot.spatialPattern = function(eigenvector, grid, title, fill_limits = c(-0.1, 0.1) ) {
  df                   = bind_cols(grid, value = eigenvector)
  colnames(df)[c(1,2)] = c("lat", "lon")
  df                   = df %>% # shift data from 0–360 to -180–180
    mutate(lon = ifelse(lon > 180, lon - 360, lon))
  
  ggplot(df, aes(x = lon, y = lat, fill = value)) +
    geom_tile() +
   # regions
    geom_rect(aes(xmin = 66, xmax = 100, ymin = 6, ymax = 39),
              inherit.aes = FALSE, color = "#41ab5d", fill = NA, linewidth = 1) +
    geom_sf(data = world.sf, fill = NA, color = "black", 
            linewidth = 0.2, inherit.aes = FALSE) +
    # labs
    labs(title = title, x = "Longitude", y = "Latitude") +
    # scales
    coord_sf(xlim = c(-180, 180), ylim = c(-20, 20), expand = FALSE, crs = "EPSG:4326") +
    scale_fill_gradientn(colors = colorRampPalette(rev(brewer.pal(9, "RdBu")))(200), 
                         limits = fill_limits,
                         guide  = guide_colorbar(barwidth = 15, barheight = 2) ) +
    scale_y_continuous(breaks = seq(-20, 20, 10))+
    # theme 
    customize.bw
}

# plot temporal PC series + ENSO indices
plot.temporalPC = function(pc, indices, title) {
  df      = data.frame(Year = 1950:2016, PC = scale(pc))
  df      = cbind(df, indices)
  # calculate correlation
  cors    = apply(df[, -c(1, 2)], 2, function(x) round(cor(df$PC, x, use = "complete.obs"), 2) )
  cor.tx  = paste0(paste(names(cors), cors, sep = " = "), collapse = ", ")
  df.melt = reshape2::melt(df, id.vars  = c("Year", "PC"), 
                           variable.name = "Index", 
                           value.name    = "value")
  df.melt = df.melt %>%
    mutate(phase = case_when(value > 0.5  ~ "Warm phase",
                             value < -0.5 ~ "Cool phase",
                             TRUE         ~ "Neutral phase"))
  
  levels(df.melt$phase) = c("Cool phase", "Neutral phase","Warm phase")
  # warm phase, cold phase, and neutral phase
  scale.colors = c("#053061", "#00441b", "#b2182b")
  scale.fill   = c("#92c5de", "#a6dba0", "#f4a582")
  
  ggplot(df.melt, aes(x = Year)) +
    geom_hline(yintercept = 0, linetype = "solid", color = "#e0e0e0") +
    geom_hline(yintercept = 2, linetype = "solid", color = "#67001f") +
    geom_hline(yintercept =-2, linetype = "solid", color = "#053061") +
    
    # ENSO indices
    geom_col(aes(y = value, color = phase, fill = phase)) + 
    geom_line(aes(y = value), linewidth = .3, linetype = 1, color = "#67001f") +
    # PC 
    geom_line(aes(y = PC), color = "#000", linewidth = .9) +
    geom_point(aes(y = PC), color = "#000", fill = "#bdbdbd", shape = 21, size = 3) +
    
    # labs 
    labs(title = paste0(title, "\n", cor.tx), y = "PC (scaled) / ENSO Index", x = "Year") +
    facet_wrap(~ Index, ncol = 1, scales = "free_y") +
    # scales
    scale_x_continuous(limits = c(1950, 2015), breaks = seq(1950, 2015, 10)) + 
    scale_color_manual("Indice", values = scale.colors) + 
    scale_fill_manual("Indice", values = scale.fill) + 
    customize.bw + 
    theme(strip.text = element_text(size = text.size),
          strip.background = element_blank())
}

# plot fields 
plot.fields = function(pc, grd.avg, title){
  r   = apply(grd.avg, 2, function(x) cor(pc, x, use = "complete.obs"))
  
  # significance (2-sided t-test)
  n   = nrow(grd.avg)
  t   = r * sqrt((n - 2) / (1 - r^2))
  p   = 2 * pt(-abs(t), df = n - 2)
  sig = ifelse(p < 0.05, TRUE, FALSE)
  
  # complete the grd
  zcor     = rep(NaN  , nx.sst * ny.sst)
  zsig     = rep(FALSE, nx.sst * ny.sst)
  zcor[sst.locs[, 3]] = r
  zsig[sst.locs[, 3]] = sig
  
  # grid for plot
  df     = expand.grid(lon = xgrid.sst, lat = ygrid.sst)
  df$cor = zcor
  df$sig = zsig
  
  # pc.maps[[i]] = 
  ggplot(df, aes(x = lon, y = lat)) +
    # correlation 
    geom_tile(aes(fill = cor)) +
    geom_contour(aes(z = cor), color = "#000", bins = 6, linewidth = 0.5, na.rm = TRUE) +
    # significance points 
    geom_point(data = df[df$sig,], aes(x = lon, y = lat), color = "#000", shape = 21, size = 0.6, stroke = 0.2) +
    # regions
    geom_rect(aes(xmin = 66, xmax = 100, ymin = 6, ymax = 39),
              inherit.aes = FALSE, color = "#41ab5d", fill = NA, linewidth = 1) +
    # borders("world", colour = "#000") +
    geom_sf(data = world.sf, fill = NA, color = "black", 
            linewidth = 0.2, inherit.aes = FALSE) +
    # labs
    labs(title = title, x = "Longitude", y = "Latitude")+
    # scales
    scale_fill_gradientn(colors = colorRampPalette(rev(brewer.pal(9, "RdBu")))(200), 
                         limits = c(-0.5, 0.5) ) +
    guides(fill  = guide_colorbar(barwidth = 15, barheight = 2) ) +
    coord_sf(xlim = c(0, 358), ylim = c(-20, 20), expand = FALSE, crs = "EPSG:4326") +
    scale_y_continuous(breaks = seq(-20, 20, 10))+
    #theme
    customize.bw
}

2 Load data and pre-process

2.1 Sea Surface Temperature fields

SST data for the tropical Pacific is loaded and reshaped. Only valid SST points are retained based on the provided locations.

path.data = "https://civil.colorado.edu/~balajir/CVEN6833/HWs/HW-2/Spring-2025-data-commands/"
# grid SST (NOAA tropical)
ntime     = 67    # Jun-Sep 1950 - 2016
ygrid.sst = seq(-16, 16 , by = 2)
xgrid.sst = seq(0  , 358, by = 2)
ny.sst    = length(ygrid.sst)
nx.sst    = length(xgrid.sst)

# read SST valid points
sst.locs = read.table(paste0(path.data, "NOAA-trop-sst-locs.txt"))
# read SST binary
data.raw = readBin(paste0(path.data, "NOAA-Trop-JJAS-SST-1950-2016.r4"),
                       what   = "numeric",
                       n      = (nx.sst * ny.sst * ntime),
                       size   = 4,
                       endian = "swap")
# reshape to (ny, nx, ntime)
sst.array   = array(data.raw, dim = c(ny.sst, nx.sst, ntime))

# check the data 
# sst.array   = array(data.raw, dim = c(nx.sst, ny.sst, ntime))
# # Plot first time slice
# image(
#   t(sst.array[,,1]),
#   col = heat.colors(100),
#   xlab = "Longitude",
#   ylab = "Latitude",
#   axes = FALSE
# )
# axis(1); axis(2)
# box()


sst.indices = sst.locs[, 3]
nsites      = length(sst.indices)

# construct sst.avg (ntime rows × valid SST sites columns)
sst.avg     = matrix(NA, nrow = ntime, ncol = nsites)
for (t in 1:ntime) {
  sst.slice    = sst.array[, , t]
  sst.avg[t, ] = sst.slice[sst.indices]
}

2.2 Indices ENSO EXTENDED

nino.ind   = c("NINO12","NINO3", "NINO34", "NINO4")
base.url   = "http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED"
suffix.url = "/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch"
enso.list  = lapply(nino.ind, function(ind){
  scan(paste0(base.url, "/.", ind, suffix.url))
})

names(enso.list) = tolower(nino.ind)
enso.indices     = as.data.frame(enso.list)

2.3 Rajeevan Gridded rainfall

Rainfall data for JJAS from 1950 to 2016 is read and reshaped into a matrix where each row represents one season (year) and each column a spatial location. Only valid (non-missing) data are retained.

# Global grid
# deg grid
nrows = 68
ncols = 65
ntime = 67    # Jun-Sep 1950 - 2016

# Lat - Long grid
ygrid = seq(6.5 , 38.5, by=0.5)
xgrid = seq(66.5, 100 , by=0.5)
ny    = length(ygrid)
nx    = length(xgrid)
xygrid= matrix(0, nrow = nx*ny, ncol=2)
i = 0
for(iy in 1:ny){
  for(ix in 1:nx){
  i           = i+1
  xygrid[i,1] = ygrid[iy]
  xygrid[i,2] = xgrid[ix]
  }
}

# read India location valid points
rain.locs = read.table(paste0(path.data, "India-rain-locs.txt"))
# read data binary
data.raw = readBin(paste0(path.data, "India-Rain-JJAS-05deg-1950-2016.r4"),
                       what   = "numeric",
                       n      = (nx * ny * ntime),
                       size   = 4,
                       endian = "swap")
# reshape to (ny, nx, ntime)
data.array   = array(data.raw, dim = c(ny, nx, ntime))
rain.indices = rain.locs[, 3]
nsites       = length(rain.indices)

# construct rain.avg (ntime rows × valid rain sites columns)
rain.avg     = matrix(NA, nrow = ntime, ncol = nsites)
for (t in 1:ntime) {
  rain.slice    = data.array[, , t]
  rain.avg[t, ] = rain.slice[rain.indices]
}

3 Principal Component Analysis (PCA)

The SST data is scaled and PCA is performed to extract the dominant modes of variability.
The eigenvalue spectrum helps us understand how much variance each mode explains.

# perform PCA
sst.scale = scale(sst.avg) # scale the data
sst.pca   = prcomp(sst.scale, center = TRUE, scale. = TRUE)
# plot eigenvalue spectrum (first 15 modes)
plot.eigenSpectrum(sst.pca)

The eigenvalue spectrum indicates that the first few PCs explain a significant portion of SST variability

\(\to\) PC1 accounts for dominant ENSO-like variability.

\(\to\) PC2–PC4 capture variations that are likely tied to Indian Ocean and western Pacific dynamics.

3.1 Plotting the Leading 4 Spatial Patterns (EOFs)

# plot leading 4 spatial patterns (EOFs)
eof.p   = list()
for (i in 1:4) {
 eof.p[[i]] = plot.spatialPattern(eigenvector = sst.pca$rotation[, i], 
                                  grid        = sst.locs, 
                                  title       = paste("EOF", i),
                                  fill_limits = c(-0.05, 0.05))
}
ggarrange(eof.p[[1]], eof.p[[2]], eof.p[[3]], eof.p[[4]], 
          common.legend = TRUE,
          legend        = "bottom",
          ncol          = 1)

\(\to\) EOF1: central/eastern Pacific warming (classic El Niño pattern)

\(\to\) EOF2–EOF4: display east-west gradients and complex tropical SST structures (e.g., IOD).

3.2 Plotting the Leading 4 Temporal Patterns (PCs)

# plot leading 4 temporal patterns (PCs)
pc.temp = list()
for (i in 1:4) {
  pc.temp[[i]] = plot.temporalPC(pc      = sst.pca$x[, i], 
                                 indices = enso.indices,
                                 title   = paste("Principal Component", i)
                                 )
}
pc.temp[[1]]

pc.temp[[2]]

pc.temp[[3]]

pc.temp[[4]]

# ggarrange(pc.temp[[1]], pc.temp[[2]], pc.temp[[3]], pc.temp[[4]], ncol = 1)

\(\to\) PC1: strong positive correlation with Niño indices, especially Niño4 (r = 0.62) and Niño3.4 (r = 0.47). Both indices captures canonical ENSO signal.

\(\to\) PC1: strong negative correlations (e.g., Niño3.4 = −0.83, possibly La Niña signal)

\(\to\) PC3 & PC4: weak/mixed correlations, indicating influence of less canonical SST patterns or internal oceanic modes.

4 Rotated PCA (Varimax)

Rotation is applied to enhance the physical interpretability of the spatial patterns. The varimax rotation redistributes the explained variance so that the resulting patterns (rotated EOFs) are more localized.

pc.data = sst.pca$x[, 1:6]
sst.rot = principal(pc.data, nfactors = 6, rotate = "varimax", scores = TRUE)

4.1 Plotting Rotated Spatial Patterns

rot.eof  = list()
for (i in 1:4) {
  # project back spatially: rotated spatial = rotation * loading
  rotated_spatial = as.matrix(sst.pca$rotation[, 1:6]) %*% as.matrix(sst.rot$loadings[, i])
  rot.eof[[i]]    = plot.spatialPattern(eigenvector = rotated_spatial, 
                                        grid        = sst.locs, 
                                        title       = paste("Rotated EOF", i),
                                        fill_limits = c(-0.05, 0.05))
}
ggarrange(rot.eof[[1]], rot.eof[[2]], rot.eof[[3]], rot.eof[[4]], 
          common.legend = TRUE,
          legend = "bottom",
          ncol = 1)

4.2 Plotting Rotated Temporal Patterns

rot.pc = list()
for (i in 1:4) {
  rot.pc[[i]] = plot.temporalPC(pc      = sst.rot$scores[, i], 
                                indices = enso.indices,
                                title   = paste("Rotated Principal Component", i)
                                 )
}
rot.pc[[1]]

rot.pc[[2]]

rot.pc[[3]]

rot.pc[[4]]

# ggarrange(rot.pc[[1]], rot.pc[[2]], rot.pc[[3]], rot.pc[[4]], 
#           ncol = 1)

4.3 Discussion on Rotated PCA

Rotated EOFs localize SST anomaly patterns, where:

\(\to\) Rotated EOF1: Strong negative ENSO signature.

\(\to\) Rotated EOFs 2-4: Appear to capture subtropical and Indian Ocean signals.

5 Correlated PCs with Rain fields

Finally, we examine how the rainfall modes are linked to SST anomalies. For each of the first four PCs, we plot the correlation with the rainfall at India and mark statistically significant regions (p < 0.05).

rain.maps = list()
for (i in 1:4) {
  pc.i           = sst.pca$x[, i]
  pc.title       = paste("SST PC", i, "vs Rainfall (r & p < 0.05)")
  rain.maps[[i]] = plot.fields(pc      = pc.i, 
                               grd.avg = rain.avg,
                               title   = pc.title)
}
ggarrange(rain.maps[[1]], rain.maps[[2]], rain.maps[[3]], rain.maps[[4]], 
          ncol          = 1,
          common.legend = TRUE,
          legend        = "bottom")

SST correlation analysis

\(\to\) PC1: significant negative correlation over core monsoon zone (El Niño suppresses rainfall).

\(\to\) PC2: positive correlation, possibly reflects La Niña enhancing monsoon rainfall.

\(\to\) PC3 mixed/weak signals, indicates lesser influence.

\(\to\) PC4: very weak or insignificant correlations.

6 Final remarks

  1. SST PCA effectively captured ENSO and IOD-related SST modes.

  2. Rainfall correlations validate the strong ENSO-monsoon linkage, consistent with Problem 4.

  3. Reversing the PCA direction confirms the bidirectional coupling between SSTs and Indian monsoon rainfall patterns.