#	Logistic Regression
#	Categorical flow probabilities are estimated for different exceedance probabilities of flow 
	
	rm(list=ls())

test=matrix(scan("Avg-SpringFlows-April1SWE-49-04.txt", skip = 1), ncol=2, byrow=T)

avgflows=test[,1]              #Spring Flows
X=test[,2]              #April 1 SWE
swe = X

Y = avgflows

#Choose your flow threshold.. here we choose the 75th percentile..
	flowthresh=quantile(avgflows,c(0.75))

	N = length(Y)

	index = 1:N
	positions=1:N
	
#	PART - I Fit the model..
# Create a binary data of Y. Y > flowthres it is 1 and 0 otherwise

	binaryts=1:N	# Prepare a binary time series
	binaryts[avgflows > flowthresh]=1
	binaryts[avgflows <= flowthresh]=0

# fit the Logistic model
	logcalib=glm(binaryts~swe,family="binomial")

# Estimate at the data locations
	logpred=predict.glm(logcalib,data.frame(swe=swe),type="response")
	plot(binaryts,logpred,xlab="Actual",ylab="Estimated",cex=1.5)


#	PART - II X-validated estimates..
# Drop a data point, fit the model and predict the dropped point
# Repeat for all the observations.

		probxval=1:N

        	for( ypos in 1:N){
	
# Drop the point ypos	
			positions1=positions[index!=ypos]


# Grab all the other points
			binaryflowts=binaryts[positions1]
			swecalib=swe[positions1]

# data point to predict - i.e., the dropped point
			swepred=swe[ypos]

# Fit the model on rest of the data.
			logcalib=glm(binaryflowts~swecalib,family="binomial")

#predict on the dropped point..
		        logpred=predict.glm(logcalib,data.frame(swecalib=swepred),type="response")
			probxval[ypos]=logpred
			}	#Loop ends for each cross validated year

		points(binaryts,probxval,pch=3,col="blue")
		legend(0.7,0.9,c("Fitted","X-validated"),pch=c(1,3),col=c("black","blue"),bty="n")


logcalib=glm(binaryts~swe,family="binomial")
ypred = predict(logcalib,type="response")


## you can set ypred = probxval   to test the skill of cross validated estimates
acc1=binaryts
N=length(acc1)
p1=length(acc1[acc1 == 0])/N
p2 = 1-p1

climo <- c(p1,p2)
rps1 <- rpsc <- 0
obs <- cbind(1-acc1,acc1)
pred = cbind(1-ypred,ypred)

for(i in 1:N){
   # Prediction model score
   rps1 <- rps1 + sum((cumsum(obs[i,])-cumsum(pred[i,]))^2)
   # Null model (climatology) score
   rpsc <- rpsc + sum((cumsum(obs[i,])-cumsum(climo))^2)
}

RPSS <- 1 - (rps1/rpsc)

### using verification package..
library(verification)
acctemp=acc1+1 # Add 1 to get categories 1 and 2
rps(acctemp, pred, baseline=climo )$rpss