--- title: "hw2_Prob5" author: "Haiying Peng" date: "2025-03-15" output: html_document --- #### 5. Repeat 5 by swapping summer rainfall with summer global tropical SSTs and vice-versa and, show correlation maps. ```{r} #### Clear Memory and Set Options rm(list=ls()) options(warn=-1) save <- FALSE # Change to TRUE if you want to save the plot # Set working directory (Modify if necessary) script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path) setwd(script_dir) library(maps) library(akima) library(fields) library(RColorBrewer) myPalette <- colorRampPalette((brewer.pal(9, "RdBu")), space="Lab") # Blue high, red low ## read SST data nrows_sst <- 180 ncols_sst <- 17 ntime = 67 #Jun-Sep 1950 - 2016 nglobe_sst = nrows_sst * ncols_sst N_sst = nrows_sst * ncols_sst ### Lat - Long grid.. ygrid_sst = seq(-16,16,by=2) ny_sst =length(ygrid_sst) xgrid_sst=seq(0,358,by=2) nx_sst=length(xgrid_sst) xygrid_sst=matrix(0,nrow=nx_sst * ny_sst,ncol=2) i=0 for(iy in 1:ny_sst){ for(ix in 1:nx_sst){ i=i+1 xygrid_sst[i,1]=ygrid_sst[iy] xygrid_sst[i,2]=xgrid_sst[ix] } } data_sst=readBin("data/NOAA-Trop-JJAS-SST-1950-2016.r4",what="numeric", n=( nrows_sst * ncols_sst * ntime), size=4,endian="swap") data_sst <- array(data_sst, dim=c( nrows_sst, ncols_sst, ntime ) ) data1_sst=data_sst[,,1] # the lat -long data grid.. index_sst=1:(nx_sst*ny_sst) ## pull only data and the locations with non-missing data index1_sst=index_sst[data1_sst < 20 & data1_sst != "NaN"] # only non-missing data. xygrid1_sst=xygrid_sst[index1_sst,] nsites_sst=length(index1_sst) data2_sst=data1_sst[index1_sst] ### SSTdata matrix - rows are seasonal (i.e. one value per year) ## and columns are locations sstdata=matrix(NA,nrow=ntime, ncol=nsites_sst) for(i in 1:ntime){ data1_sst=data_sst[,,i] index1_sst=index_sst[data1_sst < 20 & data1_sst != "NaN"] data2_sst=data1_sst[index1_sst] sstdata[i,]=data2_sst } sstannavg = sstdata indexgrid_sst = index1_sst rm("data_sst") #remove the object data to clear up space ## write out the grid locations.. write(t(cbind(xygrid1_sst, indexgrid_sst)),file="data/5-NOAA-trop-sst-locs.txt",ncol=3) ``` ##### (i)Perform a PCA ```{r} ###################### PCA ## #get variance matrix.. ## scale the data sstscale = scale(sstannavg) zs=var(sstscale) #do an Eigen decomposition.. zsvd=svd(zs) #Principal Components... sstpcs=t(t(zsvd$u) %*% t(sstscale)) #Eigen Values.. - fraction variance lambdas=(zsvd$d/sum(zsvd$d)) plot(1:25, lambdas[1:25], type="b", xlab="Modes", ylab="Frac. Var. explained") grid() print(sprintf("The first 4 PCs of explain %s%% of the variance", round(sum(lambdas[1:4])*100))) print(sprintf("The first 6 PCs of explain %s%% of the variance", round(sum(lambdas[1:6])*100))) ``` ```{r} #plots.. xlong = sort(unique(xygrid_sst[,2])) ylat = sort(unique(xygrid_sst[,1])) # Plot first 4 spatial eigenvectors for (i in 1:4) { zfull = rep(NaN,nglobe_sst) # zfull[indexgrid_sst]=zsvd$u[,2] zmat = matrix(zfull,nrow=nrows_sst,ncol=ncols_sst) image.plot(xlong,ylat,zmat,ylim=range(-16,16),col=myPalette(200)) contour(xlong,ylat,(zmat),ylim=range(-16,16),add=TRUE,nlev=6,lwd=2) maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2) title(paste("PCA Spatial Mode", i)) grid() } ``` ```{r} # Plot first 4 temporal PCs for (i in 1:4) { plot(1950:2016, sstpcs[,i],type="b",xlab="Year",ylab=paste("PC", i)) grid() } ``` ```{r} ## ENSO index nino4=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO4/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch") nino34=scan("http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO34/T/(Jan%201950)/(Dec%202016)/RANGE/T/(Jun-Sep)/seasonalAverage/data.ch") for (i in 1:4) { r_val <- round(cor(scale(sstpcs[,i]), nino34), 3) print(bquote(.(paste("Correlation between PC", i, "and ENSO index:")) ~ R^2 == .(round(r_val, 3)))) } ``` ##### Note that cor(nino34, sstpcs[,2]) gives the highest correlation meaning the second mode is ENSO ```{r} ## plot the PCs for (i in 1:4) { plot(1950:2016, scale(sstpcs[,i]),type="b",xlab="Year",ylab=paste("PC", i)) lines(1950:2016, nino34, col="red") grid() } ``` #### Now perform PCA on sstanrem ##### (ii)Rotate first six PCS ```{r} zrot = varimax(zsvd$u[,1:6],normalize=FALSE) sstrotpcs=t(t(zrot$loadings) %*% t(sstannavg)) ## plot the first 4 rotated spatial mode and temporal PCs xlong = sort(unique(xygrid_sst[,2])) ylat = sort(unique(xygrid_sst[,1])) par(mfrow = c(2, 2)) par(mar = c(3, 4, 2, 1)) for(i in 1:4){ zfull = rep(NaN,nglobe_sst) #also equal 72*36 zfull[indexgrid_sst]=zrot$loadings[,3] zmat = matrix(zfull,nrow=nrows_sst,ncol=ncols_sst) image.plot(xlong,ylat,zmat,ylim=range(-16,16),col=myPalette(200)) mtext(paste("Rotated EOF no.",i,sep=""), side = 3, line = 0.2, cex = 0.8) contour(xlong,ylat,(zmat),ylim=range(-16,16),add=TRUE,nlev=6,lwd=2) maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2) box() plot(1950:2016, scale(sstrotpcs[,i]),type="b",xlab="Year",ann=FALSE) mtext(paste("Rotated PC no.",i,sep=""), side = 3, line = 0.2, cex = 0.8) } ``` #### Plot rotated PCs and ENSO ```{r} for (i in 1:4) { r_val <- round(cor(scale(sstrotpcs[,i]), nino34), 3) print(bquote(.(paste("the correlation between rotated PC no.", i, "and ENSO index:")) ~ R^2 == .(round(r_val, 3)))) } ``` ```{r} for (i in 1:4) { plot(1950:2016, scale(sstrotpcs[,i]),type="b",xlab="Year",ann=FALSE) mtext(paste("Rotated PC no.",i,sep=""), side = 3, line = 0.2, cex = 0.8) lines(1950:2016, nino34, col="red") } ``` ##### Comments ##### SST PCA The first mode explains nearly 40% of the total variance,indicating a strong dominant pattern in SST variability.The second mode explains nearly 20% of the total variance.The first 4 PCs of explain 73% of the variance. ##### Rotated SST PCA Both PCA and rotated PCA successfully extract the ENSO mode. Rotated PCA spreads ENSO variability over multiple components (esp. RPC2 and RPC3), which improves spatial localization but makes temporal interpretation slightly less direct. #### Correlated PCs of SSTs with Rain field ```{r} nrows <- 68 ncols <- 65 ntime <- 67 #Jun-Sep 1950 - 2016 nglobe <- nrows * ncols N = nrows * ncols ### Lat - Long grid.. ygrid=seq(6.5,38.5,by=0.5) ny=length(ygrid) xgrid=seq(66.5,100,by=0.5) nx=length(xgrid) xygrid=matrix(0,nrow=nx*ny,ncol=2) i=0 for(iy in 1:ny){ for(ix in 1:nx){ i=i+1 xygrid[i,1]=ygrid[iy] xygrid[i,2]=xgrid[ix] } } data=readBin("data/India-Rain-JJAS-05deg-1950-2016.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap") data <- array(data = data, dim=c( nrows, ncols, ntime ) ) data1=data[,,1] # the lat -long data grid.. index=1:(nx*ny) ## pull only data and the locations with non-missing data index1=index[data1 != "NaN"] # only non-missing data. xygrid1=xygrid[index1,] nsites=length(index1) data2=data1[index1] ### Rain data matrix - rows are seasonal (i.e. one value per year) ## and columns are locations raindata=matrix(NA,nrow=ntime, ncol=nsites) for(i in 1:ntime){ data1=data[,,i] index1=index[ data1 != "NaN"] data2=data1[index1] raindata[i,]=data2 } index = 1:dim(raindata)[2] xx = apply(raindata,2,mean) index2 = index1[xx > 0] index3 = index[xx > 0] xygrid1=xygrid[index2,] rainavg = raindata[,index3] indexgrid = index2 rm("data") #remove the object data to clear up space ## write out the grid locations.. write(t(cbind(xygrid1,indexgrid)),file="data/5-India-rain-locs.txt",ncol=3) ``` ```{r} rainlocs = read.table("data/5-India-rain-locs.txt") xlong = xgrid ylat = ygrid for (i in 1:4) { xcor = cor(sstpcs[,i], rainavg) zfull = rep(NaN,nx*ny) # zfull[rainlocs[,3]]=xcor zmat = matrix(zfull,nrow=nx,ncol=ny) image.plot(xlong,ylat,zmat,ylim=range(5,40),col=myPalette(200),zlim=c(-0.5,0.5)) contour(xlong,ylat,(zmat),ylim=range(5,40),add=TRUE,nlev=3,lwd=2) title(main = paste("Correlation of SST PC", i, "with rain field")) maps::map('world',wrap=c(0,360),add=TRUE,resolution=0,lwd=2) grid() } ```