Catalina Jerez
catalina.jerez@colorado.edu
We repeat Problem 4 by swapping summer rainfall with summer global tropical SSTs and vice-
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 543173 29.1 1201782 64.2 NA 700280 37.4
## Vcells 1004863 7.7 8388608 64.0 36864 1963567 15.0
rm(list = ls())
# Load necessary libraries
spatialAnalysis.Lib = c("fields", "maps", "rnaturalearth", "sf")
statisAnalysis.Lib = c("psych")
dataManipulation.Lib = c("dplyr", "reshape2", "tidyr", "tidyverse")
dataVisualization.Lib = c("ggplot2", "ggrepel", "ggpubr", "RColorBrewer")
list.packages = unique(c(spatialAnalysis.Lib, statisAnalysis.Lib,
dataManipulation.Lib, dataVisualization.Lib))
# Load all required packages
sapply(list.packages, require, character.only = TRUE)
## fields maps rnaturalearth sf psych
## TRUE TRUE TRUE TRUE TRUE
## dplyr reshape2 tidyr tidyverse ggplot2
## TRUE TRUE TRUE TRUE TRUE
## ggrepel ggpubr RColorBrewer
## TRUE TRUE TRUE
Below we define functions for plotting the eigenvalue spectrum, spatial patterns, temporal PC series, and SST fields.
# constants
text.size = 15
customize.bw = theme_bw(base_family = "Times") +
theme(
axis.text.x = element_text(size = text.size, vjust = 0.5),
axis.text.y = element_text(size = text.size),
axis.title = element_text(size = text.size + 2, face = "bold"),
plot.title = element_text(size = text.size + 2, face = "bold"),
legend.position = "bottom",
legend.title = element_text(size = text.size, face = "bold"),
legend.text = element_text(size = text.size),
panel.border = element_rect(color = "#000000", fill = NA, linewidth = 0.5),
panel.grid.minor.x = element_line(color = "#d9d9d9", linetype = "dotted"),
panel.grid.minor.y = element_line(color = "#d9d9d9", linetype = "dotted"),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()
)
# color palettes
palette.rdbu = colorRampPalette(rev(brewer.pal(9, "RdBu")))(200)
# world map
world.sf = ne_countries(scale = "medium", returnclass = "sf") %>%
st_wrap_dateline(options = c("WRAPDATELINE=YES", "DATELINEOFFSET=180")) %>%
st_transform("+proj=longlat +datum=WGS84")
# plot PCA
plot.eigenSpectrum = function(pca_result, n = 15) {
eigvals = pca_result$sdev^2
frac_var = eigvals / sum(eigvals)
df = data.frame(Mode = 1:length(frac_var), Variance = frac_var)
breaks = seq(0, 15, 3); breaks[1] = 1
ggplot(df[1:n, ], aes(x = Mode, y = Variance)) +
geom_line(color = "#636363") +
geom_point(color = "#000", fill = "#bdbdbd", shape = 21, size = 3) +
scale_x_continuous(breaks = breaks)+
labs(y = "Fraction Variance", x = "Mode",title = "PCA Eigenvalue Spectrum")+
theme_bw(base_size = 12, base_family = "Times") +
customize.bw
}
# plot spatial patterns
plot.spatialPattern = function(eigenvector, grid, title, fill_limits = c(-0.1, 0.1) ) {
df = bind_cols(grid, value = eigenvector)
colnames(df)[c(1,2)] = c("lat", "lon")
df = df %>% # shift data from 0–360 to -180–180
mutate(lon = ifelse(lon > 180, lon - 360, lon))
ggplot(df, aes(x = lon, y = lat, fill = value)) +
geom_tile() +
# regions
geom_rect(aes(xmin = 66, xmax = 100, ymin = 6, ymax = 39),
inherit.aes = FALSE, color = "#41ab5d", fill = NA, linewidth = 1) +
geom_sf(data = world.sf, fill = NA, color = "black",
linewidth = 0.2, inherit.aes = FALSE) +
# labs
labs(title = title, x = "Longitude", y = "Latitude") +
# scales
coord_sf(xlim = c(-180, 180), ylim = c(-20, 20), expand = FALSE, crs = "EPSG:4326") +
scale_fill_gradientn(colors = colorRampPalette(rev(brewer.pal(9, "RdBu")))(200),
limits = fill_limits,
guide = guide_colorbar(barwidth = 15, barheight = 2) ) +
scale_y_continuous(breaks = seq(-20, 20, 10))+
# theme
customize.bw
}
# plot temporal PC series + ENSO indices
plot.temporalPC = function(pc, indices, title) {
df = data.frame(Year = 1950:2016, PC = scale(pc))
df = cbind(df, indices)
# calculate correlation
cors = apply(df[, -c(1, 2)], 2, function(x) round(cor(df$PC, x, use = "complete.obs"), 2) )
cor.tx = paste0(paste(names(cors), cors, sep = " = "), collapse = ", ")
df.melt = reshape2::melt(df, id.vars = c("Year", "PC"),
variable.name = "Index",
value.name = "value")
df.melt = df.melt %>%
mutate(phase = case_when(value > 0.5 ~ "Warm phase",
value < -0.5 ~ "Cool phase",
TRUE ~ "Neutral phase"))
levels(df.melt$phase) = c("Cool phase", "Neutral phase","Warm phase")
# warm phase, cold phase, and neutral phase
scale.colors = c("#053061", "#00441b", "#b2182b")
scale.fill = c("#92c5de", "#a6dba0", "#f4a582")
ggplot(df.melt, aes(x = Year)) +
geom_hline(yintercept = 0, linetype = "solid", color = "#e0e0e0") +
geom_hline(yintercept = 2, linetype = "solid", color = "#67001f") +
geom_hline(yintercept =-2, linetype = "solid", color = "#053061") +
# ENSO indices
geom_col(aes(y = value, color = phase, fill = phase)) +
geom_line(aes(y = value), linewidth = .3, linetype = 1, color = "#67001f") +
# PC
geom_line(aes(y = PC), color = "#000", linewidth = .9) +
geom_point(aes(y = PC), color = "#000", fill = "#bdbdbd", shape = 21, size = 3) +
# labs
labs(title = paste0(title, "\n", cor.tx), y = "PC (scaled) / ENSO Index", x = "Year") +
facet_wrap(~ Index, ncol = 1, scales = "free_y") +
# scales
scale_x_continuous(limits = c(1950, 2015), breaks = seq(1950, 2015, 10)) +
scale_color_manual("Indice", values = scale.colors) +
scale_fill_manual("Indice", values = scale.fill) +
customize.bw +
theme(strip.text = element_text(size = text.size),
strip.background = element_blank())
}
# plot fields
plot.fields = function(pc, grd.avg, title){
r = apply(grd.avg, 2, function(x) cor(pc, x, use = "complete.obs"))
# significance (2-sided t-test)
n = nrow(grd.avg)
t = r * sqrt((n - 2) / (1 - r^2))
p = 2 * pt(-abs(t), df = n - 2)
sig = ifelse(p < 0.05, TRUE, FALSE)
# complete the grd
zcor = rep(NaN , nx.sst * ny.sst)
zsig = rep(FALSE, nx.sst * ny.sst)
zcor[sst.locs[, 3]] = r
zsig[sst.locs[, 3]] = sig
# grid for plot
df = expand.grid(lon = xgrid.sst, lat = ygrid.sst)
df$cor = zcor
df$sig = zsig
# pc.maps[[i]] =
ggplot(df, aes(x = lon, y = lat)) +
# correlation
geom_tile(aes(fill = cor)) +
geom_contour(aes(z = cor), color = "#000", bins = 6, linewidth = 0.5, na.rm = TRUE) +
# significance points
geom_point(data = df[df$sig,], aes(x = lon, y = lat), color = "#000", shape = 21, size = 0.6, stroke = 0.2) +
# regions
geom_rect(aes(xmin = 66, xmax = 100, ymin = 6, ymax = 39),
inherit.aes = FALSE, color = "#41ab5d", fill = NA, linewidth = 1) +
# borders("world", colour = "#000") +
geom_sf(data = world.sf, fill = NA, color = "black",
linewidth = 0.2, inherit.aes = FALSE) +
# labs
labs(title = title, x = "Longitude", y = "Latitude")+
# scales
scale_fill_gradientn(colors = colorRampPalette(rev(brewer.pal(9, "RdBu")))(200),
limits = c(-0.5, 0.5) ) +
guides(fill = guide_colorbar(barwidth = 15, barheight = 2) ) +
coord_sf(xlim = c(0, 358), ylim = c(-20, 20), expand = FALSE, crs = "EPSG:4326") +
scale_y_continuous(breaks = seq(-20, 20, 10))+
#theme
customize.bw
}
SST data for the tropical Pacific is loaded and reshaped. Only valid SST points are retained based on the provided locations.
path.data = "https://civil.colorado.edu/~balajir/CVEN6833/HWs/HW-2/Spring-2025-data-commands/"
# grid SST (NOAA tropical)
ntime = 67 # Jun-Sep 1950 - 2016
ygrid.sst = seq(-16, 16 , by = 2)
xgrid.sst = seq(0 , 358, by = 2)
ny.sst = length(ygrid.sst)
nx.sst = length(xgrid.sst)
# read SST valid points
sst.locs = read.table(paste0(path.data, "NOAA-trop-sst-locs.txt"))
# read SST binary
data.raw = readBin(paste0(path.data, "NOAA-Trop-JJAS-SST-1950-2016.r4"),
what = "numeric",
n = (nx.sst * ny.sst * ntime),
size = 4,
endian = "swap")
# reshape to (ny, nx, ntime)
sst.array = array(data.raw, dim = c(ny.sst, nx.sst, ntime))
# check the data
# sst.array = array(data.raw, dim = c(nx.sst, ny.sst, ntime))
# # Plot first time slice
# image(
# t(sst.array[,,1]),
# col = heat.colors(100),
# xlab = "Longitude",
# ylab = "Latitude",
# axes = FALSE
# )
# axis(1); axis(2)
# box()
sst.indices = sst.locs[, 3]
nsites = length(sst.indices)
# construct sst.avg (ntime rows × valid SST sites columns)
sst.avg = matrix(NA, nrow = ntime, ncol = nsites)
for (t in 1:ntime) {
sst.slice = sst.array[, , t]
sst.avg[t, ] = sst.slice[sst.indices]
}
nino.ind = c("NINO12","NINO3", "NINO34", "NINO4")
base.url = "http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED"
suffix.url = "/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch"
enso.list = lapply(nino.ind, function(ind){
scan(paste0(base.url, "/.", ind, suffix.url))
})
names(enso.list) = tolower(nino.ind)
enso.indices = as.data.frame(enso.list)
Rainfall data for JJAS from 1950 to 2016 is read and reshaped into a matrix where each row represents one season (year) and each column a spatial location. Only valid (non-missing) data are retained.
# Global grid
# deg grid
nrows = 68
ncols = 65
ntime = 67 # Jun-Sep 1950 - 2016
# Lat - Long grid
ygrid = seq(6.5 , 38.5, by=0.5)
xgrid = seq(66.5, 100 , by=0.5)
ny = length(ygrid)
nx = length(xgrid)
xygrid= matrix(0, nrow = nx*ny, ncol=2)
i = 0
for(iy in 1:ny){
for(ix in 1:nx){
i = i+1
xygrid[i,1] = ygrid[iy]
xygrid[i,2] = xgrid[ix]
}
}
# read India location valid points
rain.locs = read.table(paste0(path.data, "India-rain-locs.txt"))
# read data binary
data.raw = readBin(paste0(path.data, "India-Rain-JJAS-05deg-1950-2016.r4"),
what = "numeric",
n = (nx * ny * ntime),
size = 4,
endian = "swap")
# reshape to (ny, nx, ntime)
data.array = array(data.raw, dim = c(ny, nx, ntime))
rain.indices = rain.locs[, 3]
nsites = length(rain.indices)
# construct rain.avg (ntime rows × valid rain sites columns)
rain.avg = matrix(NA, nrow = ntime, ncol = nsites)
for (t in 1:ntime) {
rain.slice = data.array[, , t]
rain.avg[t, ] = rain.slice[rain.indices]
}
The SST data is scaled and PCA is performed to extract the dominant
modes of variability.
The eigenvalue spectrum helps us understand
how much variance each mode explains.
# perform PCA
sst.scale = scale(sst.avg) # scale the data
sst.pca = prcomp(sst.scale, center = TRUE, scale. = TRUE)
# plot eigenvalue spectrum (first 15 modes)
plot.eigenSpectrum(sst.pca)
The eigenvalue spectrum indicates that the first few PCs explain a significant portion of SST variability
\(\to\) PC1 accounts for dominant ENSO-like variability.
\(\to\) PC2–PC4 capture variations that are likely tied to Indian Ocean and western Pacific dynamics.
# plot leading 4 spatial patterns (EOFs)
eof.p = list()
for (i in 1:4) {
eof.p[[i]] = plot.spatialPattern(eigenvector = sst.pca$rotation[, i],
grid = sst.locs,
title = paste("EOF", i),
fill_limits = c(-0.05, 0.05))
}
ggarrange(eof.p[[1]], eof.p[[2]], eof.p[[3]], eof.p[[4]],
common.legend = TRUE,
legend = "bottom",
ncol = 1)
\(\to\) EOF1:
central/eastern Pacific warming (classic El Niño pattern)
\(\to\) EOF2–EOF4: display east-west gradients and complex tropical SST structures (e.g., IOD).
# plot leading 4 temporal patterns (PCs)
pc.temp = list()
for (i in 1:4) {
pc.temp[[i]] = plot.temporalPC(pc = sst.pca$x[, i],
indices = enso.indices,
title = paste("Principal Component", i)
)
}
pc.temp[[1]]
pc.temp[[2]]
pc.temp[[3]]
pc.temp[[4]]
# ggarrange(pc.temp[[1]], pc.temp[[2]], pc.temp[[3]], pc.temp[[4]], ncol = 1)
\(\to\) PC1: strong positive correlation with Niño indices, especially Niño4 (r = 0.62) and Niño3.4 (r = 0.47). Both indices captures canonical ENSO signal.
\(\to\) PC1: strong negative correlations (e.g., Niño3.4 = −0.83, possibly La Niña signal)
\(\to\) PC3 & PC4: weak/mixed correlations, indicating influence of less canonical SST patterns or internal oceanic modes.
Rotation is applied to enhance the physical interpretability of the spatial patterns. The varimax rotation redistributes the explained variance so that the resulting patterns (rotated EOFs) are more localized.
pc.data = sst.pca$x[, 1:6]
sst.rot = principal(pc.data, nfactors = 6, rotate = "varimax", scores = TRUE)
rot.eof = list()
for (i in 1:4) {
# project back spatially: rotated spatial = rotation * loading
rotated_spatial = as.matrix(sst.pca$rotation[, 1:6]) %*% as.matrix(sst.rot$loadings[, i])
rot.eof[[i]] = plot.spatialPattern(eigenvector = rotated_spatial,
grid = sst.locs,
title = paste("Rotated EOF", i),
fill_limits = c(-0.05, 0.05))
}
ggarrange(rot.eof[[1]], rot.eof[[2]], rot.eof[[3]], rot.eof[[4]],
common.legend = TRUE,
legend = "bottom",
ncol = 1)
rot.pc = list()
for (i in 1:4) {
rot.pc[[i]] = plot.temporalPC(pc = sst.rot$scores[, i],
indices = enso.indices,
title = paste("Rotated Principal Component", i)
)
}
rot.pc[[1]]
rot.pc[[2]]
rot.pc[[3]]
rot.pc[[4]]
# ggarrange(rot.pc[[1]], rot.pc[[2]], rot.pc[[3]], rot.pc[[4]],
# ncol = 1)
Rotated EOFs localize SST anomaly patterns, where:
\(\to\) Rotated EOF1: Strong negative ENSO signature.
\(\to\) Rotated EOFs 2-4: Appear to capture subtropical and Indian Ocean signals.
SST PCA effectively captured ENSO and IOD-related SST modes.
Rainfall correlations validate the strong ENSO-monsoon linkage, consistent with Problem 4.
Reversing the PCA direction confirms the bidirectional coupling between SSTs and Indian monsoon rainfall patterns.