# Logistic Regression # Categorical flow probabilities are estimated for different exceedance probabilities of flow rm(list=ls()) test=matrix(scan("Avg-SpringFlows-April1SWE-49-04.txt", skip = 1), ncol=2, byrow=T) avgflows=test[,1] #Spring Flows X=test[,2] #April 1 SWE swe = X Y = avgflows #Choose your flow threshold.. here we choose the 75th percentile.. flowthresh=quantile(avgflows,c(0.75)) N = length(Y) index = 1:N positions=1:N # PART - I Fit the model.. # Create a binary data of Y. Y > flowthres it is 1 and 0 otherwise binaryts=1:N # Prepare a binary time series binaryts[avgflows > flowthresh]=1 binaryts[avgflows <= flowthresh]=0 # fit the Logistic model logcalib=glm(binaryts~swe,family="binomial") # Estimate at the data locations logpred=predict.glm(logcalib,data.frame(swe=swe),type="response") plot(binaryts,logpred,xlab="Actual",ylab="Estimated",cex=1.5) # PART - II X-validated estimates.. # Drop a data point, fit the model and predict the dropped point # Repeat for all the observations. probxval=1:N for( ypos in 1:N){ # Drop the point ypos positions1=positions[index!=ypos] # Grab all the other points binaryflowts=binaryts[positions1] swecalib=swe[positions1] # data point to predict - i.e., the dropped point swepred=swe[ypos] # Fit the model on rest of the data. logcalib=glm(binaryflowts~swecalib,family="binomial") #predict on the dropped point.. logpred=predict.glm(logcalib,data.frame(swecalib=swepred),type="response") probxval[ypos]=logpred } #Loop ends for each cross validated year points(binaryts,probxval,pch=3,col="blue") legend(0.7,0.9,c("Fitted","X-validated"),pch=c(1,3),col=c("black","blue"),bty="n") logcalib=glm(binaryts~swe,family="binomial") ypred = predict(logcalib,type="response") ## you can set ypred = probxval to test the skill of cross validated estimates acc1=binaryts N=length(acc1) p1=length(acc1[acc1 == 0])/N p2 = 1-p1 climo <- c(p1,p2) rps1 <- rpsc <- 0 obs <- cbind(1-acc1,acc1) pred = cbind(1-ypred,ypred) for(i in 1:N){ # Prediction model score rps1 <- rps1 + sum((cumsum(obs[i,])-cumsum(pred[i,]))^2) # Null model (climatology) score rpsc <- rpsc + sum((cumsum(obs[i,])-cumsum(climo))^2) } RPSS <- 1 - (rps1/rpsc) ### using verification package.. library(verification) acctemp=acc1+1 # Add 1 to get categories 1 and 2 rps(acctemp, pred, baseline=climo )$rpss