#load packages
#install the packages before using install.packages("name of packages")
library(reshape2)
library(data.table) #to use the melt
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
library(ggplot2) #to plot, map_data, scale_fill_gradientn, etc
library(utils) #to read the data
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(sm) # for sm.density in diagnostics
## Package 'sm', version 2.2-5.7: type help(sm) for summary information
library(leaps) # to provide combinations
library(MPV) # for calculating AIC
## Loading required package: lattice
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
library(akima) # for interp to smooth for plotting
library(fields) # for surface to plot surface plot
## Loading required package: spam
## Loading required package: dotCall64
## Loading required package: grid
## Spam version 2.7-0 (2021-06-25) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
##
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
##
## backsolve, forwardsolve
## Loading required package: viridis
## Loading required package: viridisLite
## See https://github.com/NCAR/Fields for
## an extensive vignette, other supplements and source code
library(mgcv) # for GAM
## Loading required package: nlme
## This is mgcv 1.8-37. For overview type 'help("mgcv-package")'.
library(locfit) # for fitting local polynomial
## locfit 1.5-9.4 2020-03-24
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:MPV':
##
## cement
## The following object is masked from 'package:sm':
##
## muscle
library(prodlim)
library(rgdal) # for converting projection of coordinates
## Loading required package: sp
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
##
## rgdal: version: 1.5-27, (SVN revision 1148)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
## Overwritten PROJ_LIB was /Library/Frameworks/R.framework/Versions/4.1/Resources/library/rgdal/proj
library(scales) # for Visualization spatial map
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
##################################################
########### graphic functions #################
##################################################
##### Quilt plotting code
Quilt_plotting=function(lon, lat, ypred, lon1, lat1, yob,save,type=""){
if (save==TRUE){
if (type=="binary"){
pdf("Predicted Binary Precipitation DEM Grid.pdf", width = 6, height = 8) # save figure
par(mfrow=c(2,1))
quilt.plot(lon, lat,ypred$fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Binary Precipitation on DEM Grid')
US( add=TRUE, col="black", lwd=2)
quilt.plot(lon1, lat1,yob,xlab="Longitude (m)",ylab="Latitude (m)",main='Actual Binary Precipitation ')
US( add=TRUE, col="black", lwd=2)
dev.off()
pdf("Predicted Standard Error DEM Grid.pdf", width = 8, height = 6) # save figure
quilt.plot(lon, lat,ypred$se.fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Standard Error on DEM Grid')
US( add=TRUE, col="black", lwd=2)
dev.off()
}else{
pdf("Predicted Precipitation DEM Grid.pdf", width = 6, height = 8) # save figure
par(mfrow=c(2,1))
quilt.plot(lon, lat,ypred$fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Precipitation on DEM Grid (mm)')
US( add=TRUE, col="black", lwd=2)
quilt.plot(lon1, lat1,yob,xlab="Longitude (m)",ylab="Latitude (m)",main='Actual Precipitation (mm)')
US( add=TRUE, col="black", lwd=2)
dev.off()
pdf("Predicted Standard Error DEM Grid.pdf", width = 8, height = 6) # save figure
quilt.plot(lon, lat,ypred$se.fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Standard Error on DEM Grid (mm)')
US( add=TRUE, col="black", lwd=2)
dev.off()
}
}else{
if (type=="binary"){
par(mfrow=c(2,1))
quilt.plot(lon, lat,ypred$fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Binary Precipitation on DEM Grid')
US( add=TRUE, col="black", lwd=2)
quilt.plot(lon1, lat1,yob,xlab="Longitude (m)",ylab="Latitude (m)",main='Actual Binary Precipitation')
US( add=TRUE, col="black", lwd=2)
quilt.plot(lon, lat,ypred$se.fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Standard Error on DEM Grid')
US( add=TRUE, col="black", lwd=2)
} else{
par(mfrow=c(2,1))
quilt.plot(lon, lat,ypred$fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Precipitation on DEM Grid (mm)')
US( add=TRUE, col="black", lwd=2)
quilt.plot(lon1, lat1,yob,xlab="Longitude (m)",ylab="Latitude (m)",main='Actual Precipitation (mm)')
US( add=TRUE, col="black", lwd=2)
quilt.plot(lon, lat,ypred$se.fit,xlab="Longitude (m)",ylab="Latitude (m)",main='Predicted Standard Error on DEM Grid (mm)')
US( add=TRUE, col="black", lwd=2)
}
}
}
##### surface plotting code
plot_surface =function(lon, lat, ypred, lon1, lat1, yob,save,type=""){
zz = interp(lon,lat,ypred$fit, duplicate = "strip")
zz2 = interp(lon1,lat1,yob, duplicate = "strip")
if (save==TRUE){
if (type=="binary"){
pdf("Predicted Binary Precipitation DEM Grid1.pdf", width = 6, height = 8) # save figure
par(mfrow=c(2,1))
surface(zz,xlab="Longitude",ylab ="Latitude",main='Predicted Binary Precipitation on DEM Grid'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$fit,yob), xlim=range(lon,lon1),ylim=range(lat,lat1))
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
surface(zz2,xlab="Longitude",ylab ="Latitude",main='Actual Binary Precipitation'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$fit,yob),xlim=range(lon,lon1),ylim=range(lat,lat1))
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
dev.off()
zz = interp(lon,lat,ypred$se, duplicate = "strip")
pdf("Predicted Standard Error DEM Grid1.pdf", width = 8, height = 6) # save figure
surface(zz,xlab="Longitude",ylab ="Latitude",main='Predicted Standard Error on DEM Grid'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$se),xlim=range(lon,lon1),ylim=range(lat,lat1))
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
dev.off()
}else{
pdf("Predicted Precipitation DEM Grid1.pdf", width = 6, height = 8) # save figure
par(mfrow=c(2,1))
surface(zz,xlab="Longitude",ylab ="Latitude",main='Predicted Precipitation on DEM Grid (mm)'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$fit,yob), xlim=range(lon,lon1),ylim=range(lat,lat1))
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
surface(zz2,xlab="Longitude",ylab ="Latitude",main='Actual Precipitation (mm)'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$fit,yob),xlim=range(lon,lon1),ylim=range(lat,lat1))
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
dev.off()
zz = interp(lon,lat,ypred$se, duplicate = "strip")
pdf("Predicted Standard Error DEM Grid1.pdf", width = 8, height = 6) # save figure
surface(zz,xlab="Longitude",ylab ="Latitude",main='Predicted Standard Error on DEM Grid (mm)'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$se),xlim=range(lon,lon1),ylim=range(lat,lat1))
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
dev.off()
}
}else{
if (type=="binary"){
par(mfrow=c(3,1))
surface(zz,xlab="Longitude",ylab ="Latitude",main='Predicted Binary Precipitation on DEM Grid'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$fit,yob), xlim=range(lon,lon1),ylim=range(lat,lat1))
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
zz = interp(lon,lat,ypred$se, duplicate = "strip")
surface(zz,xlab="Longitude",ylab ="Latitude",main='Predicted Standard Error on DEM Grid'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$se),xlim=range(lon,lon1),ylim=range(lat,lat1))
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
} else{
par(mfrow=c(1,1))
xlim=c(-115,-101.3)
ylim=c(29.5,42.5)
surface(zz,xlab="Longitude",ylab ="Latitude",main='Predicted Precipitation on DEM Grid(mm)'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$fit,yob), xlim = xlim, ylim = ylim)
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
zz = interp(lon,lat,ypred$se, duplicate = "strip")
surface(zz,xlab="Longitude",ylab ="Latitude",main='Predicted Standard Error on DEM Grid (mm)'
,horizontal = FALSE,legend.shrink = 0.9, zlim=range(ypred$se), xlim = xlim, ylim = ylim)
grid(col= "grey50", lty = "dashed")
US( add=TRUE, col="gray30", lwd=2)
}
}
}
##### Model dagnostics code
mod_diagnostics = function(y, yhat, nvar,save){
if (save==TRUE){
pdf("MODEL DIAGNOSTICS.pdf", width = 10, height = 8) # save figure
}
par(mfrow=c(2,2))
e <- y - yhat
nobs <- length(y)
coef <- nvar+1
## Testing if errors (residuals) are normal and iid
# 1. Normality histogram
hist(e,xlab="Residuals",ylab="Density",probability=T,main="Distribution of Residuals")
lines(sort(e),dnorm(sort(e),mean=0,sd=sd(e)),col="red")
sm.density(e,add=T,col="blue")
legend("topright", c("Normal Fit", "Non-parametric Fit"), lty=c(1,1), lwd=c(2.5,2.5), col=c("red", "blue"), cex = 0.5)
# 2. Normal QQplot
qqnorm(e,main="Normal Q-Q Plot of Residuals")
qqline(e)
## Testing for heteroskedasticity (which is constant variance of residuals)
# 3. Residuals versus estimates
plot(yhat,e,xlab="Estimated Precip",ylab="Residuals",main="Residuals vs Estimated Precip")
abline(0,0)
## Testing for autocorrelation. If errors are uncorrelated they fall between the dotted lines.
# 4. Autocorrelation plot
cor=acf(e,main="Autocorrelation Plot")
if (save==TRUE){
dev.off()
}
}
#######################################
############analysis function ########
######################################
loocv_func = function(bestmod,X,prec_obs,save) {
# Select only the data from the best model (not full model)
if (class(bestmod)[1]=="locfit"){
mod_data = X
N = length(prec_obs)
}else{
mod_data = bestmod$model
N = length(mod_data$prec_obs)
}
# Initialize leave one out cross validation vector
loocv = vector("numeric", nrow(mod_data))
# slightly different cases for different problems
if (class(bestmod)[1]=='lm') {
for (i in 1:nrow(mod_data)) {
drop_data = mod_data[-i,] # drop one precip value
drop_mod = lm(drop_data$prec_obs ~ ., data = drop_data)
x_pred = mod_data[i,]
loocv[i] = predict(drop_mod, x_pred)
}
} else if (class(bestmod)[1]=='glm') {
for (i in 1:nrow(mod_data)) {
drop_data = mod_data[-i,] # drop one precip value
drop_mod = glm(prec_obs ~ ., data = drop_data, family = bestmod$family)
x_pred = mod_data[i,]
loocv[i]=predict.glm(drop_mod, newdata=x_pred, se.fit=F,type="response")
}
} else if (class(bestmod)[1] == "gam") {
for(i in 1:nrow(mod_data)){
drop_data = mod_data[-i,] # drop one precip value
# names(drop_data) = c("lat","lon","elev")
drop_mod = gam(bestmod$formula, data = drop_data)
x_pred = mod_data[i,]
loocv[i] = predict.gam(drop_mod, newdata=x_pred, se.fit=F,type="response")
}
} else if (class(bestmod)[1]=="locfit"){
for(i in 1:nrow(mod_data)){
drop_data = mod_data[-i,] # drop one precip value
prec_obs1=prec_obs[-i]
# names(drop_data) = c("lat","lon","elev")
x_pred = mod_data[i,]
drop_mod=locfit(prec_obs1 ~., data = data.frame(drop_data), alpha=bestmod$call$alpha, maxk = 10000, deg=bestmod$call$deg,kern="bisq"
, scale = T, family=bestmod$call$family, link=bestmod$call$link)
loocv[i]=predict(drop_mod,newdata=x_pred)
}
}
# Plot LOOCV against observations
if (save==TRUE){
pdf("LOOCV.pdf", width = 8, height = 6) # save figure
lim = range(prec_obs, loocv)
plot(prec_obs, loocv, xlim = lim, ylim=lim, xlab="Observed", ylab="X-validated Estimate", main= "LOOCV" )
#lines(mod_data$prec_obs, mod_data$prec_obs, col = "red")
abline(a=0,b=1,col = "red")
dev.off()
}else{
par(mfrow=c(1,1))
lim = range(prec_obs, loocv)
plot(prec_obs, loocv, xlim = lim, ylim=lim, xlab="Observed", ylab="X-validated Estimate", main= "LOOCV" )
abline(a=0,b=1,col = "red")
}
}
Drop_10_pred = function(bestmod,X,prec_obs,save,family="") {
# Select only the data from the best model (not full model)
if (class(bestmod)[1]=="locfit"){
mod_data = X
N = length(prec_obs)
}else{
mod_data = bestmod$model
N = length(mod_data$prec_obs)
}
drop = 10
nsample = 500
i_full = 1:N
# initialize skill score vectors
skill_rmse = vector(mode="numeric", length=nsample)
skill_cor = vector(mode="numeric", length=nsample)
for (i in 1:nsample){
i_drop = sample(i_full,N*drop/100) # can add argument replace=TRUE
drop_dt = mod_data[-i_drop,] # drop 10% precip value
# slightly different cases for different problems
if (class(bestmod)[1]=='lm') {
fit_drop = lm(drop_dt$prec_obs ~ ., data = drop_dt)
drop_pred = predict(fit_drop, newdata=mod_data[i_drop,])
} else if (class(bestmod)[1]=='glm') {
drop_mod = glm(drop_dt$prec_obs ~ ., data = drop_dt, family = bestmod$family)
drop_pred =predict.glm(drop_mod, newdata=mod_data[i_drop,], se.fit=F,type="response")
} else if (class(bestmod)[1] == "gam") {
fit_drop = gam(bestmod$formula, data = drop_dt)
drop_pred = predict.gam(fit_drop, newdata=bestmod$model[i_drop,], se.fit=F,type="response")
} else if (class(bestmod)[1]=="locfit"){
if (family=="gaussian" | family=="gamma"){
prec_obs1=prec_obs[-i_drop]
##remove small values
prec_obs1[prec_obs1<=0.1]=0.1
drop_mod=locfit(prec_obs1 ~., data = data.frame(drop_dt), alpha=bestmod$call$alpha, maxk = 10000, deg=bestmod$call$deg,kern="bisq"
, scale = T, family=bestmod$call$family, link=bestmod$call$link)
drop_pred=predict(drop_mod,newdata=mod_data[i_drop,])
} else{
prec_obs1=prec_obs[-i_drop]
drop_mod=locfit(prec_obs1 ~., data = data.frame(drop_dt), alpha=bestmod$call$alpha, maxk = 10000, deg=bestmod$call$deg,kern="bisq")
drop_pred=predict(drop_mod,newdata=mod_data[i_drop,])
}
}
drop_actual = prec_obs[i_drop]
skill_rmse[i] = sqrt(mean((drop_actual - drop_pred)^2))
skill_cor[i] = cor(drop_actual,drop_pred)
}
# Plot skill of model based on Drop 10% method
if (save==TRUE){
pdf("boxplots.pdf", width = 8, height = 6) # save figure
par(mfrow=c(1,2))
boxplot(skill_rmse, main = "RMSE-Skill", ylim = range(skill_rmse))
boxplot(skill_cor, main = "Cor-Skill", ylim=range(skill_cor))
dev.off()
}else{
par(mfrow=c(1,2))
boxplot(skill_rmse, main = "RMSE-Skill", ylim = range(skill_rmse))
boxplot(skill_cor, main = "Cor-Skill", ylim=range(skill_cor))
}
}
##################################################
########### model fitting functions #################
##################################################
#### GLM Fitting ###########
### Find best GLM
GLM_fit = function(prec_obs, X, family) {
if (family == "gamma") {
links = c("log", "inverse","identity")
# clean data and remove zeros
prec_obs = ifelse(prec_obs <=0, runif(1, 0.0001, 0.001), prec_obs)
} else if (family == "binomial"){
links = c("logit", "probit", "cauchit")
} else if (family == "gaussian"){
links = c("identity")
}
N = length(prec_obs)
combs = leaps(X,prec_obs, nbest=25) # GEt upto 25 combinations for each
# number of predictors
combos = combs$which
ncombos = length(combos[,1])
glm_val=vector(length = length(links))
bestcombo=vector(length = length(links))
obj_func=1:ncombos
xmse = 1:ncombos
for(j in 1:length(links)) {
aux_var=1 # allow to fit glm
if (aux_var==1)
{for(i in 1:ncombos) {
xx = X[,combos[i,]]
xx=as.data.frame(xx)
names(xx) = names(X[combos[i,]])
if (family == "gamma"){
zz=glm(prec_obs ~ ., data=xx, family = Gamma(link=links[j]))
}else if (family == "binomial"){
zz=glm(prec_obs ~ ., data=xx, family = binomial(link=links[j]))
}else if (family == "gaussian"){
zz=glm(prec_obs ~ ., data=xx, family = gaussian(link=links[j]))
}
obj_func[i]=AIC(zz)
xmse[i] = sum((zz$res)^2) / (N - length(zz$coef))
}}
if (aux_var==1){
# Test using AIC objective function
glm_val[j]=min(obj_func)
bestcombo[j]=which.min(obj_func)
}else{
# Test using AIC objective function
glm_val[j]=200000
bestcombo[j]=which.min(obj_func)
}
}
press_df = data.frame(glm_val)
rownames(press_df) = links[1:length(links)]
print("Results of AIC for bestfit GLM")
print(press_df)
sprintf("Choosing the GLM which minimizes AIC: %s family and %s link function.", family, links[which.min(glm_val)])
xx = X[,combos[bestcombo[which.min(glm_val)],]]
xx = data.frame(xx)
names(xx) = names(X[combos[bestcombo[which.min(glm_val)],]])
if (family == "gamma") {
bestmod = glm(prec_obs ~ ., data = xx, family = Gamma(link=links[which.min(glm_val)]))
} else if (family == "binomial") {
bestmod = glm(prec_obs ~ ., data = xx, family = binomial(link=links[which.min(glm_val)]))
} else if (family == "gaussian") {
bestmod = glm(prec_obs ~ ., data = xx, family = gaussian(link=links[which.min(glm_val)]))
} else {
print("Error!")
}
bestmod$Call$LINK=links[which.min(glm_val)]
bestmod$Call$AIC=AIC(bestmod)
bestmod$Call$combo=combos[bestcombo[which.min(glm_val)],]
return(bestmod)
}
##### Local Polynomial Fitting ######
#search for best alpha over a range of alpha values between 0 and 1
locpoly_fit = function(prec_obs, X, family="", link="", glm=FALSE,plot=FALSE) {
nvar=length(X[1,]) #number of variables
N=length(prec_obs) #number ofdata points
if(glm==TRUE) {
if (family == "gamma") {
prec_obs = ifelse(prec_obs <=0, runif(1, 0.0001, 0.001), prec_obs)
}
porder=1
minalpha=0.6
alpha_grid=seq(minalpha,1.0,by=0.05)
n=length(alpha_grid)
porder=2
minalpha=0.6
alpha1_grid=seq(minalpha,1.0,by=0.05)
alpha=alpha2_grid=c(alpha_grid,alpha1_grid)
#get the GCV values for all the alpha values in alpha for order of
# polynomial = 1 and 2. kern="bisq" argument is to use the bisquare kernel
# in computing the weights of the neighbors, which are then used in
# the weighted least squares..
gcv_deg1=gcvplot(prec_obs ~ ., data=X, maxk = 100000, alpha=alpha_grid,deg=1,kern="bisq", ev=dat(),scale=TRUE,family=family,link=link)
gcv_deg2=gcvplot(prec_obs ~ ., data=X, maxk = 100000, alpha=alpha1_grid,deg=2,kern="bisq",ev=dat(),scale=TRUE,family=family,link=link)
# pick the best alpha and the degree of the polynomial that
# gives the least GCV
z2=order(c(gcv_deg1$values,gcv_deg2$values))
bestdeg=1
if(z2[1] > n)bestdeg=2
best_alpha = alpha2_grid[z2[1]]
best_gcv = c(gcv_deg1$values,gcv_deg2$values)[z2[1]]
output=c(bestdeg, best_alpha, best_gcv) #the best parameter set
# Now fit the LOCFIT model using the best alpha and degree obtained from above..
if (plot==FALSE){
bestmod=locfit(prec_obs ~., data=X, alpha=best_alpha, maxk = 10000, deg=bestdeg,kern="bisq"
, scale = T, family=family, link=link,ev=dat())
}else {
bestmod=locfit(prec_obs ~., data=X, alpha=best_alpha, maxk = 10000, deg=bestdeg,kern="bisq"
, scale = T, family=family, link=link)
}
bestmod$call$alpha = best_alpha
bestmod$call$deg = bestdeg
bestmod$call$family = family
bestmod$call$link = link
bestmod$call$gcv = best_gcv
}
else{
porder=1
minalpha=2*(nvar*porder+1)/N
alpha_grid=seq(minalpha,1.0,by=0.05)
n=length(alpha_grid)
porder=2
minalpha=2*(nvar*porder+1)/N
alpha1_grid=seq(minalpha,1.0,by=0.05)
alpha=alpha2_grid=c(alpha_grid,alpha1_grid)
#get the GCV values for all the alpha values in alpha for order of
# polynomial = 1 and 2. kern="bisq" argument is to use the bisquare kernel
# in computing the weights of the neighbors, which are then used in
# the weighted least squares..
gcv_deg1=gcvplot(prec_obs ~ ., data=X, maxk = 100000, alpha=alpha_grid,deg=1,kern="bisq", ev=dat(),scale=TRUE)
gcv_deg2=gcvplot(prec_obs ~ ., data=X, maxk = 100000, alpha=alpha1_grid,deg=2,kern="bisq",ev=dat(),scale=TRUE)
# pick the best alpha and the degree of the polynomial that
# gives the least GCV
z2=order(c(gcv_deg1$values,gcv_deg2$values))
bestdeg=1
if(z2[1] > n)bestdeg=2
best_alpha = alpha2_grid[z2[1]]
best_gcv = c(gcv_deg1$values,gcv_deg2$values)[z2[1]]
output=c(bestdeg, best_alpha, best_gcv) #the best parameter set
# Now fit the LOCFIT model using the best alpha and degree obtained from above..
if (plot==FALSE){
bestmod=locfit(prec_obs ~., data=X, alpha=best_alpha, maxk = 10000, deg=bestdeg,kern="bisq",ev=dat())
}else{
bestmod=locfit(prec_obs ~., data=X, alpha=best_alpha, maxk = 10000, deg=bestdeg,kern="bisq")
}
bestmod$call$alpha = best_alpha
bestmod$call$deg = bestdeg
bestmod$call$gcv = best_gcv
}
return(bestmod)
}
### F-test for Local Polynomial Fitting ###
loc_Ftest = function(prec_obs,prec_hat,X, bestmod) {
N=length(prec_obs) #number ofdata points
RSS1 = sum((prec_obs-prec_hat$fit)^2)
nu1 = bestmod$dp[6] # trace(L) [lk]
nu2 = bestmod$dp[7] # trace(L^T L) [df1]
nu11 = N-2*nu1 + nu2
#linear regression ###
# #linear regression
X=as.matrix(X)
zzLin=lm(prec_obs~X)
XX = cbind(rep(1,N), X)
# Compute the Hat matrix
hatm = XX %*% solve(t(XX) %*% XX) %*% t(XX)
II = diag(N)
delta0 = t(II-hatm)%*%(II-hatm) #Equation 9.2
nu00 = sum(diag(delta0))
RSS0 = sum(residuals(zzLin)^2)
Fdata = (RSS0 - RSS1)/(nu00 - nu11)
Fdata = (Fdata / (RSS1 / nu11))
Ftheor = qf(0.95,(nu00-nu11), nu11) #95% confidence level..
## Fdata > Ftheor - reject null - i.e., data is otherwise (local polynomial)
if (Fdata > Ftheor) {
print("F-test:")
print(sprintf("Reject the Null because F(local poly) = %0.2f > %0.2f = F(linear model).", Fdata, Ftheor))
}
}