# This code is to model a lag-1 Modified Nonparametric K-nearest neighbor # model for a monthly streamflow time series. test=matrix(scan("LeesFerry-monflows-1906-2006.txt"),ncol=13,byrow=T) WYmonflow=test[,2:13] WYmonflow = WYmonflow/10^6 #WYmonflow = matrix(scan("0725RebuiltNatFlow.txt"),ncol=12,byrow=T) #WYmonflow = matrix(scan("leesferry0695cy.txt"),ncol = 12,byrow = T) #converts ac-ft/month to cms #WYmonflow = WYmonflow*.0004690502 nsim = 250 # number of simulations ytype = "CY" #ytype = "WY" nsim1 = nsim+1 library(locfit) #initialize the matrices that store the statistics # the first row contains the statistic of the historical #data. armean = matrix(0,nsim1,13) arsd = matrix(0,nsim1,13) arcor = matrix(0,nsim1,13) arskw = matrix(0,nsim1,13) armax = matrix(0,nsim1,13) armin = matrix(0,nsim1,13) nyrs = length(WYmonflow[,1]) #number of years of data for each month nyrs1 = nyrs-1 nmons = nyrs*12 #number of values to be generated #nyrs is the number of years and 12 is the number of #months. febsim = matrix(0,ncol=nsim,nrow=nyrs) aprsim = matrix(0,ncol=nsim,nrow=nyrs) junsim = matrix(0,ncol=nsim,nrow=nyrs) augsim = matrix(0,ncol=nsim,nrow=nyrs) octsim = matrix(0,ncol=nsim,nrow=nyrs) maysim = matrix(0,ncol=nsim,nrow=nyrs) decsim = matrix(0,ncol=nsim,nrow=nyrs) mxsp = 1:nsim1 mxdef = 1:nsim1 maxs = 1:nsim1 maxd = 1:nsim1 KNNflows = matrix(0,nmons,nsim) annflow = 1:nyrs for(i in 1:nyrs)annflow[i] = sum(WYmonflow[i,1:12]) x = annflow #fit lowess and get the residuals series.. resids = matrix(0,nrow=nyrs1,ncol=12) alpha = seq(0.2,0.8,by=0.05) alpha1 = seq(0.2,0.8,by=0.05) alpha2 = c(alpha,alpha1) for(i in 1:12){ i1 = i-1 if(i == 1){ zz = gcvplot(WYmonflow[2:nyrs,1]~WYmonflow[1:nyrs1,12],alpha=alpha,deg=1, ev="cross") #z1 = gcvplot(WYmonflow[2:nyrs,1]~WYmonflow[1:nyrs1,12],alpha=alpha,deg=2) #z2 = order(c(zz$values,z1$values)) z2 = order(zz$values) deg1 = 1 #if(z2[1] > 13)deg1 = 2 zz = locfit(WYmonflow[2:nyrs,1]~WYmonflow[1:nyrs1,12],alpha=alpha2[z2[1]],deg=deg1) x1l = zz resids[,i] = residuals(zz) } else{ zz = gcvplot(WYmonflow[1:nyrs1,i]~WYmonflow[1:nyrs1,i1],alpha=alpha,deg=1,ev="cross") #z1 = gcvplot(WYmonflow[1:nyrs1,i]~WYmonflow[1:nyrs1,i1],alpha=alpha,deg=2) #z2 = order(c(zz$values,z1$values)) z2 = order(zz$values) deg1 = 1 #if(z2[1] > 13)deg1 = 2 zz = locfit(WYmonflow[1:nyrs1,i]~WYmonflow[1:nyrs1,i1],alpha=alpha2[z2[1]],deg=deg1) if(i == 2)x2l = zz if(i == 3)x3l = zz if(i == 4)x4l = zz if(i == 5)x5l = zz if(i == 6)x6l = zz if(i == 7)x7l = zz if(i == 8)x8l = zz if(i == 9)x9l = zz if(i == 10)x10l = zz if(i == 11)x11l = zz if(i == 12)x12l = zz resids[,i] = residuals(zz) } } #get the weight metric. Here we are using 1/k weight function kk = sqrt(nyrs) #number nearest neighbors 'K' kk = round(kk) W = 1:kk W = 1/W W = W/sum(W) W = cumsum(W) for(k in 1:nsim){ xsim = 1:nmons #for the first month of the first year - pick a value at random #from one of the nyrs years. i = round(runif(1,1,nyrs)) #SHOULD THIS BE NYRS xsim[1] = WYmonflow[i,1] xprev = xsim[1] for(i in 2:nmons){ j = i %% 12 if(j == 0)j = 12 j1 = j-1 #get the distance if(j == 1){xx = abs(xprev-WYmonflow[1:nyrs1,12])} else{xx = abs(xprev-WYmonflow[1:nyrs1,j1])} #order the distance xz = order(xx) #select the first K neighbors xz = xz[1:kk] #pick a neighbor out of the K neighbros based on the weight #metric W xx = runif(1,0,1) xy = c(xx,W) xx = rank(xy) i1 = xz[xx[1]] if(j == 1)xm = predict(x1l,xprev) if(j == 2)xm = predict(x2l,xprev) if(j == 3)xm = predict(x3l,xprev) if(j == 4)xm = predict(x4l,xprev) if(j == 5)xm = predict(x5l,xprev) if(j == 6)xm = predict(x6l,xprev) if(j == 7)xm = predict(x7l,xprev) if(j == 8)xm = predict(x8l,xprev) if(j == 9)xm = predict(x9l,xprev) if(j == 10)xm = predict(x10l,xprev) if(j == 11)xm = predict(x11l,xprev) if(j == 12)xm = predict(x12l,xprev) xsim[i] = xm + resids[i1,j] #print(c(i,j,xprev,xm,resids[i1,j])) xprev = xsim[i] } simdismon = matrix(xsim,nrow=12) simdismon = t(simdismon) KNNflows[,k] = xsim if(ytype=="WY"){ junsim[1:nyrs,k] = simdismon[,9] febsim[1:nyrs,k] = simdismon[,5] aprsim[1:nyrs,k] = simdismon[,7] augsim[1:nyrs,k] = simdismon[,11] octsim[1:nyrs,k] = simdismon[,1] maysim[1:nyrs,k] = simdismon[,8] decsim[1:nyrs,k] = simdismon[,3] } if(ytype=="CY"){ junsim[1:nyrs,k] = simdismon[,6] febsim[1:nyrs,k] = simdismon[,2] aprsim[1:nyrs,k] = simdismon[,4] augsim[1:nyrs,k] = simdismon[,8] octsim[1:nyrs,k] = simdismon[,10] maysim[1:nyrs,k] = simdismon[,5] decsim[1:nyrs,k] = simdismon[,12] } #***calculate basic statistics**************** k1 = k+1 for(j in 1:12){ armean[k1,j] = mean(simdismon[,j]) armax[k1,j] = max(simdismon[,j]) armin[k1,j] = min(simdismon[,j]) arsd[k1,j] = sd(simdismon[,j]) arskw[k1,j] = skew(simdismon[,j]) #arskw[k1,j] = sum((simdismon[,j]-armean[k1,j])^3) #arskw[k1,j] = arskw[k1,j]/nyrs #arskw[k1,j] = arskw[k1,j]/arsd[k1,j]^3 } for(j in 2:12) { j1 = j-1 arcor[k1,j] = cor(simdismon[,j],simdismon[,j1]) } arcor[k1,1] = cor(simdismon[1:nyrs1,12],simdismon[2:nyrs,1]) arann = 1:nyrs for(i in 1:nyrs){ arann[i] = (sum(simdismon[i,]))/12 } armean[k1,13] = mean(arann) arsd[k1,13] = sd(arann) armax[k1,13] = max(arann) armin[k1,13] = min(arann) arskw[k1,13] = skew(arann) #arskw[k1,13] = sum((arann-armean[k1,13])^3) #arskw[k1,13] = arskw[k1,13]/nyrs #arskw[k1,13] = arskw[k1,13]/arsd[k1,13]^3 arcor[k1,13] = cor(arann[1:nyrs1],arann[2:nyrs]) # mon diffs.. #xx = array(t(simdismon[1:nyrs,1:12])) #xy = diff(xx) #mondiffs[k1,] = xy #drought statistics #Set the threshold level frac = 1 th = frac*mean(array(t(WYmonflow[1:nyrs,1:12]))) #th = quantile(array(t(WYmonflow[1:nyrs,1:12])),0.5) #drought statistics for simulation A = array(t(simdismon)) xx = A Y = A xx[xx <= th] = 0 xx[xx > th] = 1 n = length(xx) x1 = xx if(xx[1]==1)x1 = c(0,x1) if(xx[1]==1)Y = c(0,Y) if(xx[n]==1)x1 = c(x1,0) if(xx[n]==1)Y = c(Y,0) n = length(x1[x1==1]) xy = order(x1)[1:(length(x1)-n)] y3 = xy x3 = diff(xy)-1 x3 = x3[x3>0] mxsp[k1] = max(x3) #Longest Surplus LS #Maximum surplus nsp = length(x3) surp = 1:nsp nsp = length(y3) #print(c(k1,mxsp[k1],mxdef[k1],i1,i2,nsp)) isp = 0 for(i in 1:(nsp-1)){ i1 = y3[i]+1 i2 = y3[i+1]-1 if(i2 >= i1){isp = isp+1 surp[isp] = sum(Y[i1:i2]) } } #print(c(k1,isp,nsp,length(x3))) maxs[k1] = max(surp) #Maximum Surplus MS # for longest drought xx = A Y = A xx[xx <= th] = 1 xx[xx > th] = 0 n = length(xx) x1 = xx if(xx[1]==1)x1 = c(0,x1) if(xx[1]==1)Y = c(0,Y) if(xx[n]==1)x1 = c(x1,0) if(xx[n]==1)Y = c(Y,0) n = length(x1[x1==1]) xy = order(x1)[1:(length(x1)-n)] y3 = xy x2 = diff(xy)-1 x2 = x2[x2>0] mxdef[k1] = max(x2) #Longest Drought LD #Maximum Deficit nsp = length(x2) surp = 1:nsp nsp = length(y3) isp = 0 for(i in 1:(nsp-1)){ i1 = y3[i]+1 i2 = y3[i+1]-1 if(i2 >= i1){isp = isp+1 surp[isp] = sum(Y[i1:i2]) } } #print(c(k1,isp,nsp,length(x3))) maxd[k1] = max(surp) #Maximium Surplus MD } #********************************************* #These below do not need to be in k loop #************************* #Basic observed data obsmean = 1:13 obssd = 1:13 obscor = 1:13 obsskw = 1:13 obsmax = 1:13 obsmin = 1:13 for(i in 1:12){ obsmax[i] = max(WYmonflow[,i]) obsmin[i] = min(WYmonflow[,i]) obsmean[i] = mean(WYmonflow[,i]) obssd[i] = sd(WYmonflow[,i]) obsskw[i] = skew(WYmonflow[,i]) #obsskw[i] = sum((WYmonflow[,i]-obsmean[i])^3) #obsskw[i] = obsskw[i]/nyrs #obsskw[i] = obsskw[i]/obssd[i]^3 } for(i in 2:12){ i1 = i-1 obscor[i] = cor(WYmonflow[,i], WYmonflow[,i1]) } obscor[1] = cor(WYmonflow[1:nyrs1,12], WYmonflow[2:nyrs,1]) obsann = 1:nyrs for(i in 1:nyrs){ obsann[i] = (sum(WYmonflow[i,]))/12 } obsmax[13] = max(obsann) obsmin[13] = min(obsann) obsmean[13] = mean(obsann) obssd[13] = sd(obsann) obsskw[13] = skew(obsann) #obsskw[13] = sum((obsann-obsmean[13])^3) #obsskw[13] = obsskw[13]/nyrs #obsskw[13] = obsskw[13]/obssd[13]^3 obscor[13] = cor(obsann[1:nyrs1],obsann[2:nyrs]) armean[1,1:13] = obsmean[1:13] arsd[1,1:13] = obssd[1:13] arskw[1,1:13] = obsskw[1:13] arcor[1,1:13] = obscor[1:13] armax[1,1:13] = obsmax[1:13] armin[1,1:13] = obsmin[1:13] # observed statistics #xx = array(t(WYmonflow[1:nyrs,1:12])) #xy = diff(xx) #mondiffs[1,] = xy #Observed data drought statistics A = array(t(WYmonflow[1:nyrs,1:12])) xx = A Y = A xx[xx <= th] = 0 xx[xx > th] = 1 n = length(xx) x1 = xx if(xx[1]==1)x1 = c(0,x1) if(xx[1]==1)Y = c(0,Y) if(xx[n]==1)x1 = c(x1,0) if(xx[n]==1)Y = c(Y,0) n = length(x1[x1==1]) xy = order(x1)[1:(length(x1)-n)] y3 = xy x3 = diff(xy)-1 x3 = x3[x3>0] mxsp[1] = max(x3) #Longest Surplus LS #Maximum Surplus nsp = length(x3) surp = 1:nsp nsp = length(y3) isp = 0 for(i in 1:(nsp-1)){ i1 = y3[i]+1 i2 = y3[i+1]-1 if(i2 >= i1){isp = isp+1 surp[isp] = sum(Y[i1:i2]) } } maxs[1] = max(surp) # MS # for longest drought xx = A Y = A xx[xx <= th] = 1 xx[xx > th] = 0 n = length(xx) x1 = xx if(xx[1]==1)x1 = c(0,x1) if(xx[1]==1)Y = c(0,Y) if(xx[n]==1)x1 = c(x1,0) if(xx[n]==1)Y = c(Y,0) n = length(x1[x1==1]) xy = order(x1)[1:(length(x1)-n)] y3 = xy x2 = diff(xy)-1 x2 = x2[x2>0] mxdef[1] = max(x2) #Longest Drought LD #Maximum deficit nsp = length(x2) surp = 1:nsp nsp = length(y3) isp = 0 for(i in 1:(nsp-1)){ i1 = y3[i]+1 i2 = y3[i+1]-1 if(i2 >= i1){isp = isp+1 surp[isp] = sum(Y[i1:i2]) } } maxd[1] = max(surp) # MD #write the stats in separate files.. write(t(arcor),file="lfknncorrs1",ncol=13) write(t(armean),file="lfknnmeans",ncol=13) write(t(arsd),file="lfknnsds",ncol=13) write(t(arskw),file="lfknnskews",ncol=13) write(t(armax),file="lfknnmax",ncol=13) write(t(armin),file="lfknnmin",ncol=13) #write(t(mondiffs),file="lfknnmondiffs",ncol=???) droutstat = cbind(mxsp,mxdef,maxs,maxd) write(t(febsim),file="lfknnfebsim",ncol=nsim) write(t(aprsim),file="lfknnaprsim",ncol=nsim) write(t(junsim),file="lfknnjunsim",ncol=nsim) write(t(augsim),file="lfknnaugsim",ncol=nsim) write(t(octsim),file="lfknnoctsim",ncol=nsim) write(t(maysim),file="lfknnmaysim",ncol=nsim) write(t(decsim),file="lfknndecsim",ncol=nsim) write(t(droutstat),file="lfknn-drought-stats",ncol=4) write((KNNflows),file="LFKNNflows.txt",ncol=12)