{ #This file contains commands to implement LOcal polynomial fitting # from basic principles.. library(locfit) #ethanol data present within LOCFIT data(ethanol) x=ethanol$E y=ethanol$NOx x = 1:10 y=c(-1.4, 2.45, 1.45, 5.38, 5.6, 5.99, 8.1, 7.54, 8.24, 10.8) N=length(y) N1=N+1 x=as.matrix(x) #to create a matrix for x if there is only one variable #such as the case with f2.dat. If x is a matrix to begin with #this will not change anything.. #number of predictors np=length(x[1,]) #you will have an outer loop for alpha and also you can have one for the polynomial order #suppose you have the alphagrid set up #suppose we wish to fit a second order polynomial porder=1 #order of polynomial #number of parameters to be estimated - i.e. number of beta coefficients nparam=porder*np + 1 #set up a beta matrix - to save all of the beta coffecients for each of the #N observations betamatrix=matrix(0,ncol=nparam,nrow=N) # Suppose the best alpha is alphaopt=0.5 #therefore the best K will be K=round(alphaopt*N) yest=1:N #estimates yest1=1:N yestl=1:N yesta=1:N #The L (or Hat) matrix.. L=matrix(0,ncol=N,nrow=N) for(i in 1:N){ #the point at which you want the estimation. xp=x[i,] #calculate the distance between xp and all other points #if xp is a point that is not part of the data then you will simply append this to the x #matrix as such xnew=rbind(xp,x) xx=as.matrix(dist(xnew)) xdist=xx[1,2:N1] #add the first column to be 1 ax = t(t(x)-xp) xk=cbind(rep(1,N),ax,ax^2) #deg = 2 xk=cbind(rep(1,N),ax) # deg = 1 yk=y distorder=order(xdist) #weight the distances using a bisquare kernel distk=xdist[distorder[K]] #distance to Kth nearest neighbor xdist[xdist >= distk]=distk #replace all the distances beyond the #distance to the Kth neighbor to 1 so the weights will be 0 weights=15*((1-((xdist/distk)*(xdist/distk)))^2)/16 weights=weights/sum(weights) #points outside of the K neighbors gets a weight of 0 #create a diagonal matrix with these weights.. weights=diag(weights) betai=solve(t(xk)%*%weights%*%xk) %*% (t(xk) %*% weights %*% yk) # zz = lsfit(xk,yk,wt=diag(weights)) ## zz$coef and the coefficients in betai should match. ### You can also fit polynomial to the data points xkk=cbind(rep(1,N),x,x^2) #deg=2 xkk=cbind(rep(1,N),x) #deg = 1 betai1=solve(t(xkk)%*%weights%*%xkk) %*% (t(xkk) %*% weights %*% yk) # zz = lsfit(xkk,ykk,wt=diag(weights)) ## zz$coef and the coefficients in betai1 should match. # Get the vector of the L matrix for this 'ith' observation evect=rep(0,N) evect[i]=1 Lx=evect%*%xk%*%(solve(t(xk)%*%weights%*%xk) )%*% (t(xk) %*% weights) L[i,]=Lx yest[i] = sum((c(1,xp)) *betai1) # for a linear polynomial #yest[i] = sum((c(1,xp,xp^2)) *betai1) ## or yestl[i]=sum(Lx*y) ## or yesta[i] = betai[1] #if you want to save the betas for each of the data points betamatrix[i,]=betai1 } ## get L from locfit L1 = L for(i in 1:N){L1[i,]=locfit(y~x,alpha=alphaopt,deg=porder,ev=x[i,], kern="bisq", geth=1)} #compute the GCV for this alpha.. gcvalpha=(N*sum((y-yest)^2)) / ((N-sum(diag(L)))^2) # compute gcv from the gcvplot command zz=gcvplot(y ~ x, alpha= alphaopt, deg=porder, kern="bisq", ev=dat(), scale=TRUE) print(c('alpha, gcv, gcv from gcvplot', alphaopt, gcvalpha, zz$values)) }