nrows=72 ncols=36 ntime = 1341 #Jan 1900 - Sep 2011 ntimep = 1332 # Jan 1900 - Dec 2010 nglobe = nrows*ncols nyrs = ntimep/12 #1900 - 2010 N = nrows*ncols ### Lat - Long grid.. locs=matrix(scan("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/files-4HW2/sst-lat-long.txt"), ncol=2, byrow=T) ygrid=seq(-87.5,87.5,by=5) ny=length(ygrid) xgrid=seq(27.5,382.5,by=5) #xgrid[xgrid > 180]=xgrid[xgrid > 180]-360 #longitude on 0-360 grid if needed xgrid[xgrid > 180]=xgrid[xgrid > 180] nx=length(xgrid) xygrid=matrix(0,nrow=nx*ny,ncol=2) i=0 for(iy in 1:ny){ for(ix in 1:nx){ i=i+1 xygrid[i,1]=ygrid[iy] xygrid[i,2]=xgrid[ix] } } # REad Kaplan SST data.. #data=readBin("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/files-HW2/Kaplan-SST-Jan1900-Sep2011.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap") data=readBin("Kaplan-SST-Jan1900-Sep2011.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap") data <- array(data = data, dim=c( nrows, ncols, ntime ) ) data1=data[,,1] # the lat -long data grid.. index=1:(nx*ny) index1=index[data1 < 20] # only non-missing data. xygrid1=xygrid[index1,] x1=xygrid1[,2] #x1[x1 < 0]= x1[x1 < 0] + 360 #xygrid1[,2]=x1 nsites=length(index1) data2=data1[index1] ### SSTdata matrix - rows are months annd columns are locations sstdata=matrix(NA,nrow=ntimep, ncol=nsites) for(i in 1:ntimep){ data1=data[,,i] index1=index[data1 < 20] data2=data1[index1] sstdata[i,]=data2 } indexgrid = index1 rm("data") #remove the object data to clear up space ### If you want annual data ## create annual average data nyrs1=ntimep/12 sstanavg = matrix(0,nrow=nyrs1, ncol=nsites) for(i in 1:nsites){ xx=t(matrix(t(sstdata[,i]),nrow=12)) sstanavg[,i]=apply(xx,1,mean) } ## write out the grid locations.. write(t(xygrid1),file="kaplan-sst-locs.txt",ncol=2) ###################### PCA ## You can do the same with the monthly SST data sstdata #get variance matrix.. zs=var(sstanavg) #do an Eigen decomposition.. zsvd=svd(zs) #Principal Components... pcs=t(t(zsvd$u) %*% t(sstanavg)) #Eigen Values.. - fraction variance lambdas=(zsvd$d/sum(zsvd$d)) plot(1:40, lambdas[1:40], type="l", xlab="Modes", ylab="Frac. Var. explained") points(1:40, lambdas[1:40], col="red") #plots.. #plot the first spatial component or Eigen Vector pattern.. library(maps) library(akima) library(fields) # the data is on a grid so fill the entire globaal grid with NaN and then populate the ocean grids with # the Eigen vector xlong = sort(unique(xygrid[,2])) ylat = sort(unique(xygrid[,1])) zfull = rep(NaN,nglobe) #also equal 72*36 zfull[indexgrid]=zsvd$u[,1] zmat = matrix(zfull,nrow=nrows,ncol=ncols) image.plot(xlong,ylat,zmat,ylim=range(-40,70)) contour(xlong,ylat,(zmat),ylim=range(-40,70),add=TRUE,nlev=6,lwd=2) map("world2",add=T) #world(add=TRUE,shift=TRUE) ### Similarly plot the other two Eigen vectors.. ################## If you wish to remove a component from the data, say first component. #### nmodes = length(zsvd$u[1,]) # number of modes nkeep = c(1) # modes to keep, here we keep the first mode. If more then # nkeep=c(1,2,3) etc.. E = matrix(0,nrow=nmodes,ncol=nmodes) E[,nkeep]=zsvd$u[,nkeep] sstanavgkeep = pcs %*% t(E) sstanrem = sstanavg - sstanavgkeep ## Now perform PCA on sstanrem