### Read in Nov-Mar SSt average data
### Read in the streamflow at 4 locations on Colorado River
### Compute the spring (Apr - Jun) average
### Perform PCA on the spring season flow


nrows=72
ncols=36
ntime = 101    #Nov-Mar 1906 -  Nov-Mar 2006
ntimep = 101   #Nov-Mar 19060 - Nov-Mar 2006
N = nrows*ncols


### Lat - Long grid..

ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid)

xgrid=seq(27.5,382.5,by=5)
#xgrid[xgrid > 180]=xgrid[xgrid > 180]-360	#longitude on 0-360 grid if needed
xgrid[xgrid > 180]=xgrid[xgrid > 180]
nx=length(xgrid)

xygrid=matrix(0,nrow=nx*ny,ncol=2)

i=0
for(iy in 1:ny){
for(ix in 1:nx){
i=i+1
xygrid[i,1]=ygrid[iy]
xygrid[i,2]=xgrid[ix]
}

}

# REad Kaplan SST data..

data=readBin("Kaplan-SST-NDJFM1906-NDJFM2006.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")


data <- array(data = data, dim=c( nrows, ncols, ntime ) )


data1=data[,,1]
# Missing value is NaN, put it to a large number..
data1[data1 == "NaN"]=1e+30	

# the lat -long data grid..

index=1:(nx*ny)

index1=index[data1 < 20]	# only non-missing data.
xygrid1=xygrid[index1,]
x1=xygrid1[,2]

#x1[x1 < 0]= x1[x1 < 0] + 360
#xygrid1[,2]=x1

nsites=length(index1)	# locations with data -i.e. global locations
data2=data1[index1]

### SSTdata matrix - rows are years and columns are locations on the globe with data
sstdata=matrix(NA,nrow=ntimep, ncol=nsites)


for(i in 1:ntimep){
data1=data[,,i]
data1[data1 == "NaN"]=1e+30	
index1=index[data1 < 20]
data2=data1[index1]
sstdata[i,]=data2
}

rm("data")	#remove the object data to clear up space


###################### PCA   ### 

#get variance matrix..
zs=var(sstdata)

#do an Eigen decomposition..
zsvd=svd(zs)

#Principal Components...
pcs=t(t(zsvd$u) %*% t(sstdata))

#Eigen Values.. - fraction variance 
lambdas=(zsvd$d/sum(zsvd$d))

plot(1:40, lambdas[1:40], type="l", xlab="Modes", ylab="Frac. Var. explained")
points(1:40, lambdas[1:40], col="red")


######
#Read in flows Oct 2905 - Sep 2006

 test=matrix(scan("ColoradoRiver-4locs-Oct1905-Sep2006.txt"),ncol=4,byrow=T)

sprflow = c()
for(i in 1:4){

xx=matrix(test[,i],ncol=12,byrow=T)
x1=apply(xx[,7:10],1,mean)	#average spring season flow

sprflow = cbind(sprflow,x1)
}

sprflow = sprflow / 10^6   ## Million Acre-feet

### perform PCA on sprflow

#get variance matrix..
sprflow1 = scale(sprflow)
zs=var(sprflow1)	#scale the data

#do an Eigen decomposition..
zsvd=svd(zs)

#Principal Components...
pcs=t(t(zsvd$u) %*% t(sprflow1))

#Eigen Values.. - fraction variance 
lambdas=(zsvd$d/sum(zsvd$d))

plot(1:4, lambdas[1:4], type="l", xlab="Modes", ylab="Frac. Var. explained")
points(1:4, lambdas[1:4], col="red")

### First two PCs explain  94% of the variance!. 
### Predict PC1 and PC2 separately. Set up a GLM or locfit type model for this
##3 resample the other two PCs