HMM
# dthmm
dthmm <-
function (x, Pi, delta, distn, pm, pn = NULL, discrete = NULL,
nonstat = TRUE)
{
if (is.null(discrete)) {
if (distn == "beta" | distn == "exp" | distn == "gamma" |
distn == "lnorm" | distn == "logis" | distn == "norm")
discrete <- FALSE
else if (distn == "binom" | distn == "pois")
discrete <- TRUE
else stop("parameter discrete must be used when applying user distributions")
}
y <- c(list(x = x, Pi = Pi, delta = delta, distn = distn,
pm = pm, pn = pn, discrete = discrete, nonstat = nonstat))
class(y) <- "dthmm"
return(y)
}
get.named.parlist <- function(x,m,dist,ic,...){
require(MASS)
fit <- fitdistr(x,dist,...)
np <- length(fit$estimate)
pars <- vector('list',np)
names(pars) <- names(fit$estimate)
init <- lapply(fit$estimate,max)
names(init) <- names(fit$estimate)
for(j in 1:m){
#print(j)
#browser()
#browser()
this.fit <- fitdistr(x[ntile.ts(x,m) == (j-1)],dist,init,...)
#for(k in 1:np)
# pars[[k]][j] <- this.fit$estimate[k]
for(k in 1:np)
pars[[k]][j] <- fit$estimate[k]
if(dist == 'normal'){
if(ic == 'same.both'){
pars[[k]][j] <- mean(x)
pars[[k]][j] <- sd(x)
} else if( ic == 'same.sd'){
pars[[k]][j] <- mean(x[ntile.ts(x,m) == (j-1)])
pars[[k]][j] <- sd(x)
}else{
pars[[k]][j] <- mean(x[ntile.ts(x,m) == (j-1)])
pars[[k]][j] <- sd(x[ntile.ts(x,m) == (j-1)])
}
}
}
pars
}
Pi_init <- function(n,type='uniform'){
matrix(rep(1/n,n^2),n)
}
delta_init <- function(n, type='uniform'){
d <- rnorm(n)^2
d/sum(d)
}
ntile.ts <-
function(x, n, limit.type = 'prob', tie = 1, altobs = NULL ){
# returns an integer vector corresponding to n states broken by equal
# probability or equal distance
#
limit <-
if(limit.type == 'prob')
quantile(x,seq(0,1,1/n))
else if(limit.type == 'equal')
seq(min(x),max(x),by=diff(range(x))/n)
if(!is.null(altobs)) limit <- quantile(altobs,seq(0,1,1/n))
b <- integer(length(x))
for(i in 1:(n+1)){
filter <-
if(tie == 1)
x >= limit[i] & x <= limit[i+1]
else
x > limit[i] & x <= limit[i+1]
#only need to set the 1's because b is already 0's
b[filter] <- as.integer(i-1)
}
if(class(x) == 'ts')
return(ts(b,start=start(x),end=end(x)))
else
return(b)
}
# AIC (or BIC) for HMM
AIC_hmm=function(hmm, k=2){
LL=hmm$LL
n=length(hmm$x)
n_pdf=length(hmm$pm) # number of parameters to fit one marginal distribution
n_states=nrow(hmm$Pi) # number of states
n_transition=nrow(hmm$Pi)*ncol(hmm$Pi) # number of transition probabilities
npar=n_states*n_pdf+n_transition
return(-2*LL+k*npar)
}
ggplot_stationary_hmm <- function(x,binwidth=NULL,res=1000,cols=NULL,...){
m <- length(x$delta)
dens <- matrix(0,nrow=m+1,ncol=res)
r <- extendrange(x$x,f=.05)
xrange <- seq(r[1],r[2],len=res)
delta <- statdist(x$Pi)
if(is.null(binwidth)) binwidth <- diff(range(x$x))/8
for(i in 1:m){
if(x$distn == 'gamma'){
dens[i,] <- delta[i]*dgamma(xrange,shape=x$pm$shape[i],rate=x$pm$rate[i])
}else if(x$distn == 'norm'){
dens[i,] <- delta[i]*dnorm(xrange,mean=x$pm$mean[i],sd=x$pm$sd[i])
}else{
stop('Distribution not supported')
}
dens[m+1,] <- dens[m+1,] + dens[i,]
}
p <- ggplot()+
geom_histogram(data=data.frame(x=as.vector(x$x)),aes(x=x,y=..density..),
binwidth=binwidth,fill='white',color='black')+
theme_bw()
dt <- data.table(x=numeric(0),y=numeric(0), state=integer(0))
for(i in 1:m)
dt <- rbind(dt, data.table(x=xrange,y=dens[i,], state=i))
dt$state <- factor(dt$state)
p <- p + geom_line(data=dt,aes(x=x,y=y,color=state)) +
geom_line(data=data.frame(x=xrange,y=dens[m+1,]),aes(x=x,y=y),color='black',size=1) +
scale_color_tableau() +
scale_x_continuous(limits=r)
p
}
statdist <- function(tpm){
m <- nrow(tpm)
ones <- rbind(rep(1,m))
I <- diag(rep(1,m))
U <- matrix(rep(1,m^2),m)
as.vector(ones %*% solve(I - tpm + U))
}
# Find best GLM
GLM_fit = function(Pm, X, family, month) {
if (family == "gamma") {
links = c("log", "inverse","identity")
# clean data and remove zeros
Pm = ifelse(Pm <=0, runif(1, 0.0001, 0.001), Pm)
} else if (family == "binomial"){
links = c("logit", "probit", "cauchit")
} else if (family == "gaussian"){
links = c("identity")
}
N = length(Pm)
combs = leaps(X,Pm, nbest=25) # GEt upto 25 combinations for each
# number of predictors
combos = combs$which
ncombos = length(combos[,1])
glm_press=vector(length = length(links))
best_comb=vector(length = length(links))
xpress=1:ncombos
xmse = 1:ncombos
for(j in 1:length(links)) {
aux_var=1 # allow to fit glm
##remove small values for gamma distribution
if (family == "gamma"){
if (links[j]=="identity"){
if( month==1){
Pm[Pm<=0.005]=0.005
}else{
Pm[Pm<=2.5]=2.5
}
}else if (links[j]=="inverse" & month==1){
Pm[Pm<=25]=25 # it is necessary to modific significantly the small values,
#so, "inverse" is not fitted for January
aux_var=0
}
}
if (aux_var==1)
{for(i in 1:ncombos) {
xx = X[,combos[i,]]
xx=as.data.frame(xx)
if (family == "gamma"){
zz=glm(Pm ~ ., data=xx, family = Gamma(link=links[j]), maxit=500)
}else if (family == "binomial"){
zz=glm(Pm ~ ., data=xx, family = binomial(link=links[j]), maxit=500)
}else if (family == "gaussian"){
zz=glm(Pm ~ ., data=xx, family = gaussian(link=links[j]), maxit=500)
}
xpress[i]=AIC(zz)
xmse[i] = sum((zz$res)^2) / (N - length(zz$coef))
#print(xpress[i])
}}
if (aux_var==1){
# Test using AIC objective function
glm_press[j]=min(xpress)
best_comb[j]=which.min(xpress)
}else{
# Test using AIC objective function
glm_press[j]=200000
best_comb[j]=which.min(xpress)
}
}
press_df = data.frame(glm_press)
rownames(press_df) = links[1:length(links)]
print("Results of AIC for bestfit GLM")
print(press_df)
print(best_comb[which.min(glm_press)])
print(sprintf("Choosing the GLM which minimizes AIC: %s family and %s link function.", family,
links[which.min(glm_press)]))
xx = X[,combos[best_comb[which.min(glm_press)],]]
xx=as.data.frame(xx)
if (family == "gamma") {
bestmod = glm(Pm ~ ., data = xx, family = Gamma(link=links[which.min(glm_press)]))
} else if (family == "binomial") {
bestmod = glm(Pm ~ ., data = xx, family = binomial(link=links[which.min(glm_press)]))
} else if (family == "gaussian") {
bestmod = glm(Pm ~ ., data = xx, family = gaussian(link=links[which.min(glm_press)]))
} else {
print("Error!")
}
bestmod$Call$LINK=links[which.min(glm_press)]
bestmod$Call$AIC=AIC(bestmod)
bestmod$Call$combo=combos[best_comb[which.min(glm_press)],]
return(bestmod)
}