### Read in Nov-Mar SSt average data ### Read in the streamflow at 4 locations on Colorado River ### Compute the spring (Apr - Jun) average ### First Perform PCA on the winter SSTs #### Then perform CCA on the winter SST PCs and the flows at the four locations #### Regress them to do forecast. ### This uses 15 PCS of SST. You can experiment with fewer PCs say 6-9 ## Also you can see the effect of including SWE and not including SWE ### The PCA commands are from last homework. nrows=72 ncols=36 ntime = 58 #Nov-Mar 1906 - Nov-Mar 2006 ntimep = 58 #Nov-Mar 1906 - Nov-Mar 2006 N = nrows*ncols ### Lat - Long grid.. ygrid=seq(-87.5,87.5,by=5) ny=length(ygrid) xgrid=seq(27.5,382.5,by=5) #xgrid[xgrid > 180]=xgrid[xgrid > 180]-360 #longitude on 0-360 grid if needed xgrid[xgrid > 180]=xgrid[xgrid > 180] nx=length(xgrid) xygrid=matrix(0,nrow=nx*ny,ncol=2) i=0 for(iy in 1:ny){ for(ix in 1:nx){ i=i+1 xygrid[i,1]=ygrid[iy] xygrid[i,2]=xgrid[ix] } } # REad Kaplan SST data.. data=readBin("Kaplan-SST-NDJFM1949-NDJFM2006.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap") data <- array(data = data, dim=c( nrows, ncols, ntime ) ) data1=data[,,1] # Missing value is NaN, put it to a large number.. data1[data1 == "NaN"]=1e+30 # the lat -long data grid.. index=1:(nx*ny) index1=index[data1 < 20] # only non-missing data. xygrid1=xygrid[index1,] x1=xygrid1[,2] #x1[x1 < 0]= x1[x1 < 0] + 360 #xygrid1[,2]=x1 nsites=length(index1) # locations with data -i.e. global locations data2=data1[index1] ### SSTdata matrix - rows are years and columns are locations on the globe with data sstdata=matrix(NA,nrow=ntimep, ncol=nsites) for(i in 1:ntimep){ data1=data[,,i] data1[data1 == "NaN"]=1e+30 index1=index[data1 < 20] data2=data1[index1] sstdata[i,]=data2 } rm("data") #remove the object data to clear up space ###################### PCA on Global SST ### #get variance matrix.. zs=var(sstdata) #do an Eigen decomposition.. zsvd=svd(zs) #Principal Components... pcs=t(t(zsvd$u) %*% t(sstdata)) #Eigen Values.. - fraction variance lambdas=(zsvd$d/sum(zsvd$d)) plot(1:40, lambdas[1:40], type="l", xlab="Modes", ylab="Frac. Var. explained") points(1:40, lambdas[1:40], col="red") ###### #Read in flows Oct 1905 - Sep 2006 test=matrix(scan("ColoradoRiver-4locs-Oct1905-Sep2006.txt"),ncol=4,byrow=T) sprflow = c() for(i in 1:4){ xx=matrix(test[,i],ncol=12,byrow=T) x1=apply(xx[,7:10],1,mean) #average spring season flow sprflow = cbind(sprflow,x1) } sprflow = sprflow / 10^6 ## Million Acre-feet ### 1949 - -2006 sprflow = sprflow[44:101,] flowMean = apply(sprflow, 2, mean) flowSd = apply(sprflow, 2, sd) flowScale = t((t(sprflow) - flowMean)/ + flowSd) ######## Read SWE - 1949 - 2011. Year, JanSWE, FebSWE, MarSWE, AprSWE test = read.table("http://cadswes2.colorado.edu/~bracken/cadswes/data/swe_crb.txt",skip=1) swe = test[1:58,5] #Apr SWE 1949 - 2006 ######## Perform CCA on the first 15 PCs of SST with the four streamflows. sstPc = pcs[,1:15] X1 = scale(sstPc) #### You mighht want to include the SWE #X1 = scale(cbind(sstPc,swe)) M=dim(sstPc)[2] J=dim(flowScale)[2] J=min(M,J) N = length(flowScale[,1]) Qx1 = qr.Q(qr(X1)) Qy1 = qr.Q(qr(flowScale)) T11 = qr.R(qr(X1)) T22 = qr.R(qr(flowScale)) VV=t(Qx1) %*% Qy1 BB = svd(VV)$v AA = svd(VV)$u BB = solve(T22) %*% svd(VV)$v * sqrt(N-1) wm1 = flowScale %*% BB AA = solve(T11) %*% svd(VV)$u * sqrt(N-1) vm1 = X1 %*% AA cancorln = svd(VV)$d[1:J] #canonical correlation Fyy = var(flowScale) %*% BB #........................................ Prediction. betahat = solve(t(AA) %*% t(X1)%*% X1 %*% AA) %*% t(AA) %*% t(X1) %*% flowScale ypred=X1 %*% AA %*% betahat ## scale back the data ## mean forecast ypred=t(t(ypred)*flowSd + flowMean) #### ypred contains four columns the CCA model estimates and compare this with ###3 sprflow the observed spring flow.