--- title: "hw2_Prob4" author: "Haiying Peng" date: "2025-03-15" output: html_document --- #### 4. Summer season (June-September total) rainfall over India exhibits significant spatial and temporal variability. You wish to identify the space-time patterns of variability of. To this end, (i) Perform a PCA on the summer season rainfall (cm) on a 0.5o x 0.5o grid (i.e. the ‘Rajeevan grid’) over India is available for the period 1901 to 2016. Perform the analysis for the 1950 ~ 2016 period --Plot the Eigen variance spectrum for the first 25 modes --Plot the leading 4 spatial (Eigen vectors) and temporal (PCs) modes of variability (ii) Perform a rotated PCA (rotate the first 6 PCs) and plot the leading 4 spatial and temporal modes of variability. Compare the results with (i) above. (iii) Correlate the first four PCs (uncorrelated) of rainfall with summer global tropical Sea Surface Temperatures (SSTs) and show correlation maps. ```{r} # Load libraries library(maps) library(akima) library(fields) library(RColorBrewer) #### Clear Memory and Set Options rm(list=ls()) options(warn=-1) save <- FALSE # Change to TRUE if you want to save the plot # Set working directory (Modify if necessary) script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path) setwd(script_dir) # Color palette myPalette <- colorRampPalette(rev(brewer.pal(9, "RdBu")), space="Lab") ##0.25 deg grid #nrows <- 135 #ncols <- 129 ### 0.5 deg grid nrows=68 ncols=65 ntime <- 67 #Jun-Sep 1950 - 2016 nyrs <- ntime nglobe <- nrows*ncols N <- nrows*ncols ### Lat - Long grid.. #ygrid=seq(6.5,38.5,by=0.25) ygrid=seq(6.5,38.5,by=0.5) ny=length(ygrid) #xgrid=seq(66.5,100,by=0.25) xgrid=seq(66.5,100,by=0.5) nx=length(xgrid) xygrid=matrix(0,nrow=nx*ny,ncol=2) i=0 for(iy in 1:ny){ for(ix in 1:nx){ i=i+1 xygrid[i,1]=ygrid[iy] xygrid[i,2]=xgrid[ix] } } # Read India Gridded Rainfall data.. data=readBin("data/India-Rain-JJAS-05deg-1950-2016.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap") #data=readBin("data.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap") data <- array(data = data, dim=c( nrows, ncols, ntime ) ) data1=data[,,1] # the lat -long data grid.. index=1:(nx*ny) ## pull only data and the locations with non-missing data index1=index[data1 != "NaN"] # only non-missing data. xygrid1=xygrid[index1,] nsites=length(index1) data2=data1[index1] ### Rain data matrix - rows are seasonal (i.e. one value per year) ## and columns are locations raindata=matrix(NA,nrow=nyrs, ncol=nsites) for(i in 1:nyrs){ data1=data[,,i] index1=index[ data1 != "NaN"] data2=data1[index1] raindata[i,]=data2 } index = 1:dim(raindata)[2] xx = apply(raindata,2,mean) index2 = index1[xx > 0] index3 = index[xx > 0] xygrid1=xygrid[index2,] rainavg = raindata[,index3] indexgrid = index2 rm("data") #remove the object data to clear up space ## write out the grid locations.. write(t(cbind(xygrid1,indexgrid)),file="data/4-India-rain-locs.txt",ncol=3) ``` ##### (i)Perform a PCA ```{r} ###################### PCA ## # Standardize and perform PCA rainscale = scale(rainavg) zs=var(rainscale) #do an Eigen decomposition.. zsvd=svd(zs) #Principal Components... rainpcs=t(t(zsvd$u) %*% t(rainscale)) #Eigen Values.. - fraction variance lambdas=(zsvd$d/sum(zsvd$d)) plot(1:25, lambdas[1:25], type="b", xlab="Modes", ylab="Frac. Var. explained") #first 25 models grid() print(sprintf("The first 4 PCs of explain %s%% of the variance", round(sum(lambdas[1:4])*100))) print(sprintf("The first 6 PCs of explain %s%% of the variance", round(sum(lambdas[1:6])*100))) ``` ```{r} #plots.. # the data is on a grid so fill the entire global grid with NaN and then populate the ocean grids with the Eigen vector xlong = sort(unique(xygrid[,2])) ylat = sort(unique(xygrid[,1])) # Plot first 4 spatial eigenvectors for (k in 1:4) { zfull = rep(NaN, nglobe) zfull[indexgrid] = zsvd$u[,k] zmat = matrix(zfull, nrow=nrows, ncol=ncols) image.plot(xlong, ylat, zmat, ylim=range(5,40), col=myPalette(200)) contour(xlong, ylat, zmat, ylim=range(5,40), add=TRUE, nlev=3, lwd=2) maps::map('world', wrap=c(0,360), add=TRUE, resolution=0, lwd=2) title(paste("PCA Spatial Mode", k)) grid() } ``` ```{r} # Plot first 4 temporal PCs for (k in 1:4) { plot(1950:2016, rainpcs[,k], type="b", xlab="Year", ylab=paste("PC", k)) grid() } ``` ```{r,message=FALSE, warning=FALSE} ## ENSO index nino4=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO4/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch") nino34=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO34/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch") for (k in 1:4) { r_val <- round(cor(scale(rainpcs[,k]), nino34), 3) print(bquote(.(paste("Correlation between PC", k, "and ENSO index:")) ~ R^2 == .(round(r_val, 3)))) } ``` ##### Note that cor(nino34, pcs[,1]) gives the highest correlation meaning the first mode is ENSO ```{r} ## plot the PCs ## PC1 will be linked to ENSO for (k in 1:4) { plot(1950:2016, scale(rainpcs[,k]),type="b",xlab="Year",ylab=paste("PC", k)) lines(1950:2016, nino34, col="red") grid() } ``` ##### (ii)Rotate first six PCS ```{r} zrot = varimax(zsvd$u[,1:6],normalize=FALSE) rainrotpcs=t(t(zrot$loadings) %*% t(rainavg)) ## plot the first 4 rotated spatial mode xlong = sort(unique(xygrid[,2])) ylat = sort(unique(xygrid[,1])) par(mfrow = c(2, 2)) par(mar = c(3, 4, 2, 1)) for(i in 1:4){ zfull = rep(NaN,nglobe) zfull[indexgrid]=zrot$loadings[,i] zmat = matrix(zfull,nrow=nrows,ncol=ncols) image.plot(xlong,ylat,zmat,ylim=range(5,40),col=myPalette(200)) mtext(paste("Rotated EOF no.",i,sep=""), side = 3, line = 0.2, cex = 0.8) contour(xlong,ylat,(zmat),ylim=range(5,40),add=TRUE,nlev=3,lwd=2) maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2) box() plot(1950:2016, scale(rainrotpcs[,i]),type="b",xlab="Year",ann=FALSE) mtext(paste("Rotated PC no.",i,sep=""), side = 3, line = 0.2, cex = 0.8) } ``` #### Plot rotated PCs and ENSO ```{r} for (i in 1:4) { r_val <- round(cor(scale(rainrotpcs[,i]), nino34), 3) print(bquote(.(paste("the correlation between rotated PC no.", k, "and ENSO index:")) ~ R^2 == .(round(r_val, 3)))) } for (i in 1:4) { plot(1950:2016, scale(rainrotpcs[,i]),type="b",xlab="Year",ann=FALSE) mtext(paste("Rotated PC no.",i,sep=""), side = 3, line = 0.2, cex = 0.8) lines(1950:2016, nino34, col="red") } ``` ##### Comments ###### PCA PC1 explains about 14–15% of the total variance.The remaining variance is distributed across many modes. The first 6 PCs of explain 42% of the variance. This means PCA does not capture the variance in a single dominant mode. PC1 also shows the strongest correlation with the ENSO index (R = +0.475), suggesting that the leading mode is most strongly associated with ENSO-related variability. ###### Rotated PCA The ENSO signal is spread across multiple rotated modes: Rotated PC1 shows a modest positive correlation (R = +0.247), while Rotated PC3 exhibits the strongest (negative) correlation with ENSO (R = –0.515). Rotation improves the spatial localization of variability patterns, making them easier to interpret regionally. However, this comes at the cost of weakening the direct alignment of ENSO with a single dominant mode, as seen in the standard PCA. ##### (iii)Correlated PCs with SST fields ```{r} nrows_sst <- 180 ncols_sst <- 17 ntime = 67 #Jun-Sep 1950 - 2016 nglobe_sst = nrows_sst * ncols_sst N_sst = nrows_sst * ncols_sst ### Lat - Long grid.. ygrid_sst = seq(-16,16,by=2) ny_sst =length(ygrid_sst) xgrid_sst=seq(0,358,by=2) nx_sst=length(xgrid_sst) xygrid_sst=matrix(0,nrow=nx_sst * ny_sst,ncol=2) i=0 for(iy in 1:ny_sst){ for(ix in 1:nx_sst){ i=i+1 xygrid_sst[i,1]=ygrid_sst[iy] xygrid_sst[i,2]=xgrid_sst[ix] } } data_sst=readBin("data/NOAA-Trop-JJAS-SST-1950-2016.r4",what="numeric", n=( nrows_sst * ncols_sst * ntime), size=4,endian="swap") data_sst <- array(data_sst, dim=c( nrows_sst, ncols_sst, ntime ) ) data1_sst=data_sst[,,1] # the lat -long data grid.. index_sst=1:(nx_sst*ny_sst) ## pull only data and the locations with non-missing data index1_sst=index_sst[data1_sst < 20 & data1_sst != "NaN"] # only non-missing data. xygrid1_sst=xygrid_sst[index1_sst,] nsites_sst=length(index1_sst) data2_sst=data1_sst[index1_sst] ### SSTdata matrix - rows are seasonal (i.e. one value per year) ## and columns are locations sstdata=matrix(NA,nrow=nyrs, ncol=nsites_sst) for(i in 1:nyrs){ data1_sst=data_sst[,,i] index1_sst=index_sst[data1_sst < 20 & data1_sst != "NaN"] data2_sst=data1_sst[index1_sst] sstdata[i,]=data2_sst } sstannavg = sstdata indexgrid_sst = index1_sst rm("data_sst") #remove the object data to clear up space ## write out the grid locations.. write(t(cbind(xygrid1_sst, indexgrid_sst)),file="data/4-NOAA-trop-sst-locs.txt",ncol=3) ``` ```{r} sstlocs = read.table("data/4-NOAA-trop-sst-locs.txt") xlong <- xgrid_sst ylat <- ygrid_sst nx_sst <- length(xlong) ny_sst <- length(ylat) for (i in 1:4) { xcor = cor(rainpcs[,i], sstannavg) zfull = rep(NaN,nx_sst*ny_sst) zfull[sstlocs[,3]]=xcor zmat = matrix(zfull,nrow=nx_sst,ncol=ny_sst) image.plot(xlong,ylat,zmat,ylim=range(-16,16),col=myPalette(200),zlim=c(-0.5,0.5)) contour(xlong,ylat,(zmat),ylim=range(-16,16),add=TRUE,nlev=6,lwd=2) title(main = paste("Correlation of Rainfall PC", i, "with SSTs")) maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2) grid() } ```