library(MASS) library(nnet) library(VGAM) library(verification) libbrary(ggplot2) # retain top 20 variables to.retain=20 imp.vars.energy=as.character(read.csv("gbm.importance.50.energy.csv",header=TRUE)[1:to.retain,"var"]) imp.vars.inj.type=as.character(read.csv("gbm.importance.50.code.csv",header=TRUE)[1:to.retain,"var"]) imp.vars.body=as.character(read.csv("gbm.importance.50.body.csv",header=TRUE)[1:to.retain,"var"]) imp.vars.severity=c("crane","heavy.material.tool","stairs","unpowered.tool","exiting.transitioning","heavy.vehicle","uneven.surface","drill","ladder","object.at.height","improper.procedure.inattention","unpowered.transporter","machinery","improper.security.of.materials","working.at.height","no.improper.PPE","small.particle","steel.steel.sections","stripping","scaffold") # find column numbers corresponding to the important variables (where binary is some attribute and outcome data set) ## Read Body part data and select the important variables data = read.csv("data.body.part.csv") index=which(colnames(data)%in%imp.vars.body) Xpredictors = data[,index] bpart=data[,1] bpart=as.character(bpart) bpart[bpart == "head"]=1 bpart[bpart == "neck"]=2 bpart[bpart == "trunk"]=3 bpart[bpart == "upper extremities"]=4 bpart[bpart == "lower extremities"]=5 ### create binary vector bpartbin=as.numeric(bpart) #get variance matrix.. zs=var(Xpredictors) #do an Eigen decomposition.. zsvd=svd(zs) #Principal Components... pcs=t(t(zsvd$u) %*% t(Xpredictors)) #Eigen Values.. - fraction variance lambdas=(zsvd$d/sum(zsvd$d)) plot(1:20, lambdas[1:20], type="l", xlab="Modes", ylab="Frac. Var. explained") points(1:20, lambdas[1:20], col="red") ### Now fit Multinomial logistic regression using Xpredictors and ## bpartbin ### For the full model change the number of PCs and evaluate the skill ## you can pcs=cbind(bpartbin,pcs) pcs = data.frame(pcs) # zzfull = multinom(bpartbin ~ V1+V2+V3+V4+V5+V6+V7+V8+V9+V10+V11+V12+V13+V14+V15, data=pcs) zzfull = multinom(bpartbin ~ ., data=pcs) N = log(length(bpart)) ## BIC zbest=stepAIC(zzfull,k=N) #################### RPSS ypred = predict(zbest, type = "probs") # RPSS calculations acc2 = as.numeric(bpart) N = length(acc2) ### using verification package you get the same results.. # Empirical probabilities p1 <- length(acc2[acc2 == 1])/N p2 <- length(acc2[acc2 == 2])/N p3 <- length(acc2[acc2 == 3])/N p4 <- length(acc2[acc2 == 4])/N p5 <- length(acc2[acc2 == 5])/N climo <- c(p1, p2, p3,p4,p5) rps(acc2, ypred, baseline=climo )$rpss