#Fit a PDF to the data
# Make plots for visual comparison - Hist+PDF, quantile plot and probability
# plots.

library(MASS)

data=matrix(scan("aismr.txt"),ncol=13,byrow=T)
xdata=data[,2] ##Jan rainfall

#suppose the historical data is in the vector 'xdata'
N=length(xdata)		#number of data points.

#points at which to estimate the PDF from the simulations and the observed

xeval=seq(min(xdata)-sd(xdata),max(xdata)+sd(xdata),length=100)
neval=length(xeval)

#### Select a model (i.e., PDF) to fit to the data and 
# diagnostics and goodness of fit exercise - using visual and KS tests

### If you decide on Gamma or Weibull you have to fit it to the data
# to obtain the paramteres..

zgamma=fitdistr(xdata,"gamma")
zweibull=fitdistr(xdata,"weibull")

###########  Estimate the PDF of the selected model at athe evaluation points 
# based on the data.

#lognormal
xdensityorig=dlnorm(xeval,meanlog=mean(log(xdata)), sdlog=sd(log(xdata)))

#normal
#xdensityorig=dnorm(xeval,meanlog=mean(xdata), sdlog=sd(xdata))

#exponential
#xdensityorig=dexp(xeval,rate=1/mean(xdata))

# Gamma
xdensityorig=dgamma(xeval,shape=zgamma$estimate[1],scale=1/zgamma$estimate[2])

# Weibull
xdensityorig=dweibull(xeval,shape=zweibull$estimate[1],scale=zweibull$estimate[2])

#Visual plots..

par(mfrow=c(2,2))

zz=hist(xdata,freq=FALSE,plot=FALSE)
hist(xdata,xlab="Monthly rainfall", ylab="", freq=FALSE,
main="",ylim=range(c(zz$density,xdensityorig)))
lines(xeval, xdensityorig, col="red")
title(main="Fitted PDF")

# Empirical or SAMPLE quantiles and Empirical Percentiles..
empquant = sort(xdata)
emppercent = 1:N/(N+1)		#Weibull plotting position

# Get the quantiles corresponding to the empirical percentiles from the fitted
# PDF model

# Also get the model percentiles corresponding to the empirical quantiles 
# - i.e., (sorted data) from the fitted PDF model.
xdatasort = sort(xdata)		#Sorted original data

#If lognormal
modquant = qlnorm(emppercent,meanlog=mean(log(xdata)), sdlog=sd(log(xdata)))
modpercent = plnorm(xdatasort,meanlog=mean(log(xdata)), sdlog=sd(log(xdata)))

#normal
#modquant = qnorm(emppercent,mean=mean(xdata), sd=sd(xdata))
#modpercent = pnorm(xdatasort,mean=mean(xdata), sd=sd(xdata))

#exponential
#modquant = qexp(emppercent,rate=1/mean(xdata))
#modpercent = pexp(xdatasort,rate=1/mean(xdata))

# Gamma
modquant=qgamma(emppercent,shape=zgamma$estimate[1],scale=1/zgamma$estimate[2])
modpercent=pgamma(xdatasort,shape=zgamma$estimate[1],scale=1/zgamma$estimate[2])

# Weibull
#modquant=qweibull(emppercent,shape=zweibull$estimate[1],scale=zweibull$estimate[2])
#modpercent=pweibull(xdatasort,shape=zweibull$estimate[1],scale=zweibull$estimate[2])

# Plot the Empirical CDF with the Model CDF
plot(xdatasort, emppercent, xlab="Rainfall", ylab="CDF (F(x))")
lines(xdatasort, modpercent, col="red")

#Quantile plot..
# For a good fit the scatterplots should be on the 1:1 line
plot(modquant, empquant, xlab="Model (or Theoretical) Quantiles", ylab="Emp.
Quantiles")
lines(modquant, modquant)

#Probability Plot..
plot(modpercent, emppercent, xlab="Model (or Theoretical) Percentiles", ylab="Emp.
Percentiles")
lines(modpercent, modpercent)

###
# Normal Q-Q plot
# Plots the theoretical (or model) quantiles from a Standard Normal PDF
# vs the sample quantiles
#zz = qqnorm(xdata, xlab="Theoretical Quantiles", ylab="Empirical Quantiles")
#lines(zz$x, zz$y)