### Read in Nov-Mar SSt average data ### Read in the streamflow at 4 locations on Colorado River ### Compute the spring (Apr - Jun) average ### Perform PCA on the spring season flow nrows=72 ncols=36 ntime = 101 #Nov-Mar 1906 - Nov-Mar 2006 ntimep = 101 #Nov-Mar 19060 - Nov-Mar 2006 N = nrows*ncols ### Lat - Long grid.. ygrid=seq(-87.5,87.5,by=5) ny=length(ygrid) xgrid=seq(27.5,382.5,by=5) #xgrid[xgrid > 180]=xgrid[xgrid > 180]-360 #longitude on 0-360 grid if needed xgrid[xgrid > 180]=xgrid[xgrid > 180] nx=length(xgrid) xygrid=matrix(0,nrow=nx*ny,ncol=2) i=0 for(iy in 1:ny){ for(ix in 1:nx){ i=i+1 xygrid[i,1]=ygrid[iy] xygrid[i,2]=xgrid[ix] } } # REad Kaplan SST data.. data=readBin("Kaplan-SST-NDJFM1906-NDJFM2006.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap") data <- array(data = data, dim=c( nrows, ncols, ntime ) ) data1=data[,,1] # Missing value is NaN, put it to a large number.. data1[data1 == "NaN"]=1e+30 # the lat -long data grid.. index=1:(nx*ny) index1=index[data1 < 20] # only non-missing data. xygrid1=xygrid[index1,] x1=xygrid1[,2] #x1[x1 < 0]= x1[x1 < 0] + 360 #xygrid1[,2]=x1 nsites=length(index1) # locations with data -i.e. global locations data2=data1[index1] ### SSTdata matrix - rows are years and columns are locations on the globe with data sstdata=matrix(NA,nrow=ntimep, ncol=nsites) for(i in 1:ntimep){ data1=data[,,i] data1[data1 == "NaN"]=1e+30 index1=index[data1 < 20] data2=data1[index1] sstdata[i,]=data2 } rm("data") #remove the object data to clear up space ###################### PCA ### #get variance matrix.. zs=var(sstdata) #do an Eigen decomposition.. zsvd=svd(zs) #Principal Components... pcs=t(t(zsvd$u) %*% t(sstdata)) #Eigen Values.. - fraction variance lambdas=(zsvd$d/sum(zsvd$d)) plot(1:40, lambdas[1:40], type="l", xlab="Modes", ylab="Frac. Var. explained") points(1:40, lambdas[1:40], col="red") ###### #Read in flows Oct 2905 - Sep 2006 test=matrix(scan("ColoradoRiver-4locs-Oct1905-Sep2006.txt"),ncol=4,byrow=T) sprflow = c() for(i in 1:4){ xx=matrix(test[,i],ncol=12,byrow=T) x1=apply(xx[,7:10],1,mean) #average spring season flow sprflow = cbind(sprflow,x1) } sprflow = sprflow / 10^6 ## Million Acre-feet ### perform PCA on sprflow #get variance matrix.. sprflow1 = scale(sprflow) zs=var(sprflow1) #scale the data #do an Eigen decomposition.. zsvd=svd(zs) #Principal Components... pcs=t(t(zsvd$u) %*% t(sprflow1)) #Eigen Values.. - fraction variance lambdas=(zsvd$d/sum(zsvd$d)) plot(1:4, lambdas[1:4], type="l", xlab="Modes", ylab="Frac. Var. explained") points(1:4, lambdas[1:4], col="red") ### First two PCs explain 94% of the variance!. ### Predict PC1 and PC2 separately. Set up a GLM or locfit type model for this ##3 resample the other two PCs