# read in the data file as a matrix of 13 columns # first column is the year and the next 12 are the monthly # rainfall values #commands to read data, calculate basic monthly statistics, plot #them for each month and also boxplots and histograms #commands to perform robust stats - median, IQR, etc. #commands to do a bivariate histogram ### source some functions.. ## function to compute skew source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/skew.r") ## function to compute skew and interquartile range together source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/skew-iqr.r") ## functions to produce boxplots with whiskers at 5th and 95th percentile source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/myboxplot.r") source("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess1/myboxplot-stats.r") #read in data.. rain=matrix(scan("http://civil.colorado.edu/~balajir/r-session-files/aismr.txt"),ncol=13,byrow=T) #pick the monthly rainfall values excluding the year. rain1=rain[,2:13] years=rain[,1] #1871 - 1999 N=length(years) #of years N1=N-1 #calculate monthly mean/median, variance/iqr and skews zmean=1:12 zmedian=1:12 zvars=1:12 ziqr=1:12 zskew=1:12 for(i in 1:12){ zmean[i]=mean(rain1[,i]) zmedian[i]=quantile(rain1[,i],c(0.5)) zvars[i]=var(rain1[,i]) ziqr[i]=diff(quantile(rain1[,i],c(0.25,0.75))) zskew[i]=skew(rain1[,i]) } #or zmean=colSums(rain1)/N #or zmean=apply(rain1,2,mean) zvars=apply(rain1,2,var) zskew=apply(rain1,2,skew) par(mfrow=c(3,1)) months=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec") months=mon.abb plot(1:12,zmean,xlab="Months",ylab="Mean Rainfall", type="l", axes=F) axis(1,at=1:12,labels=months) axis(2) box() title(main="Monthly Mean Rainfall") plot(1:12,zvars,xlab="Months",ylab="Var Rainfall", type="l", axes=F) axis(1,at=1:12,labels=months) axis(2) box() title(main="Monthly Variance of Rainfall") plot(1:12,zskew,xlab="Months",ylab="Skew of Rainfall", type="l", axes=F) axis(1,at=1:12,labels=months) axis(2) box() title(main="Monthly Skew of Rainfall") #boxplot the raw data par(mfrow=c(1,1)) rain2=as.data.frame(rain1) xs=1:12 zz=myboxplot(split(t(rain2),xs),plot=F,cex=1.0) zz$names=rep(" ",length(zz$names)) z1=bxp(zz,ylim=range(rain2,zmean),xlab="Months",ylab="Monthly rainfall",axes=F) box(lty="solid",lwd=2) axis(1,at=z1,labels=months) axis(2) points(z1,zmean,col='blue',pch=16) lines(z1,zmean,lty=1,lwd=2,col='blue') ##### or if you have the data as data frame you can do the above much more efficiently rain=read.table("http://civil.colorado.edu/~balajir/r-session-files/aismr.txt") names(rain)=c('Year',month.abb) rain1=rain[,2:13] boxplot(rain1,xlab="Months",ylab="Monthly rainfall") points(zmean,col='blue',pch=16) lines(zmean,col='blue',lwd=2) #Plot histogram of each month and comment on the features.. par(mfrow=c(3,4)) for(i in 1:12){ hist(rain1[,i],xlab="Monthly rainfall", ylab="", freq=FALSE, main="") title(main=c(months[i],' rainfall')) box() } #Plot histogram of each month using hplot in the library Rlab library(Rlab) N = dim(rain1)[1] nclass = round(log(N,2) + 1) Sturge's formula par(mfrow=c(3,4)) for(i in 1:12){ hplot(rain1[,i],nclass=ncls, xlab="Monthly rainfall", ylab="", freq=FALSE, main="") title(main=c(months[i],' rainfall')) box() } #to get wateryear from a matrix of values that corresponds to Jan ~ Dec.. zmat=cbind(rain1[1:N1,10:12],rain1[2:N,1:9]) wateryearsum=apply(zmat,1,sum) plot(years[2:N],wateryearsum,xlab="Years",ylab="Water year rainfall", type="l") abline(h=mean(wateryearsum)) #read the NINO3 index.. test=matrix(scan("nino3-index.txt"),ncol=3,byrow=T) nino3=matrix(test[,3],ncol=12,byrow=T) #get the nino3 of the summer (June - Sep) season nino3ses=apply(nino3[,6:9],1,mean) nino3years=unique(test[,1]) #1900 to 2004 #get the seasonal rainfall for the common years - i.e. 1900 - 1999 N=1999-1900+1 N2=1900-1871+1 N3=N2+N-1 sesrain=apply(rain1[,6:9],1,mean) sesrain=sesrain[N2:N3] nino3ses=nino3ses[1:N] X=sesrain[nino3ses >= 0.75] Y=sesrain[nino3ses <= -0.75] myboxplot(X,Y,axes=F) groups=c("El Nino years", "La Nina years") axis(1,at=1:2,labels=groups) axis(2) box() ############################# #Quantile Plots #-------------- #Quantile plots visually potray the quantiles, or percentiles #(which equal the quantiles times 100) of the distribution of #sample data. Quantile plots are sample approximations of the #cumulative distribution function (cdf) of a continuous random #variable. probs=seq(0.0,1.0,0.1) z=quantile(sesrain, prob=probs) plot(z, probs, xlab="Summer Rain", ylab="Cumulative Frequency", type="l") title(main=list("Quantile plot of All India Summer Rainfall")) ## Q-Q plots ## These plot the sample and theoretical quantiles (from the PDF of interest) #Variations of Quantile Plots -- Plotting positions #-------------------------------------------------- #Plotting position, p = (i - a)/(n + 1 - 2a) #Weibull, a=0; p = i/(n+1) # get the sample CDF weibp = rank(sort(sesrain))/(length(sesrain)+1) ## get the theoretical quantiles ## suppose we wish to check for Normal PDF norquant = qnorm(weibp, mean=mean(sesrain), sd=sd(sesrain)) plot(norquant, sort(sesrain), xlab="Theoretical Quantiles", ylab="Sample Quantiles",log="x") lines(sort(sesrain), sort(sesrain)) ## Similarly you can do a QQ plot for any distribution - by using the appropriate commands ### in computing the theoretical quantiles