{ # Logistic Regression # for problem 15-1 from Helsel book ##test=matrix(scan("prob15-1.txt",skip=1), ncol=5, byrow=T) #OR test=matrix(scan("http://civil.colorado.edu/~balajir/CVEN5454/R-sessions/sess5/prob-15-1.txt",skip=1), ncol=5, byrow=T) # Y = test[,5] # REsponse variable - Contamination 0 or 1 X = test[,1:4] #predictor variables.. logcalib=glm(Y ~ X,family="binomial") summary(logcalib) summary(logcalib) Call: glm(formula = Y ~ X, family = "binomial") Deviance Residuals: Min 1Q Median 3Q Max -2.09450 -0.33494 -0.08567 -0.04624 2.13239 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -13.20539 3.55814 -3.711 0.000206 *** X1 0.51527 0.15094 3.414 0.000641 *** X2 0.42909 0.27487 1.561 0.118506 X3 0.03035 0.32461 0.093 0.925515 X4 1.08951 0.29863 3.648 0.000264 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 91.474 on 123 degrees of freedom Residual deviance: 49.704 on 119 degrees of freedom AIC: 59.704 Number of Fisher Scoring iterations: 7 #################################################### #variables X2 and X3 have high p-values - i.e. they are insignificant # We fit the model without these two variables X = cbind(test[,1], test[,4]) # dropping out X2 and X3 predictor variables.. logcalib=glm(Y ~ X,family="binomial") summary(logcalib) Call: glm(formula = Y ~ X, family = "binomial") Deviance Residuals: Min 1Q Median 3Q Max -2.07135 -0.28760 -0.11266 -0.06592 2.05460 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -10.8904 2.4344 -4.474 7.7e-06 *** X1 0.4636 0.1358 3.415 0.000638 *** X2 1.0740 0.2830 3.795 0.000148 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 91.474 on 123 degrees of freedom Residual deviance: 52.545 on 121 degrees of freedom AIC: 58.545 Number of Fisher Scoring iterations: 7 ################################################# #Notice that the AIC has improved as well from the full model # Performing a Stepwise also produces the same model as above. X = test[,1:4] X = as.data.frame(X) # Full model fullm = glm(Y ~ ., data=X, family="binomial") # do a stepwise zstep = stepAIC(fullm) # output of the best model >summary(zstep) Call: glm(formula = Y ~ V1 + V2 + V4, family = "binomial", data = X) Deviance Residuals: Min 1Q Median 3Q Max -2.08079 -0.33644 -0.08457 -0.04780 2.14428 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -13.0457 3.0983 -4.211 2.55e-05 *** V1 0.5161 0.1508 3.422 0.000622 *** V2 0.4257 0.2712 1.569 0.116541 V4 1.0859 0.2952 3.679 0.000234 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 91.474 on 123 degrees of freedom Residual deviance: 49.713 on 120 degrees of freedom AIC: 57.713 Number of Fisher Scoring iterations: 7 ############################################################################# #Estimate using the best model logpred=predict.glm(zstep,data = X, se.fit=T,type="response") #logpred$fit contains the estimate from the Logistic regression # logpred$se contains the standard error of the estimate. # If you wish prediction interval.. logpred = predict.glm(zstep, data = X, se.fit=T, type="response", interval="predict") ##################################################################