# CVEN 5454 - Lecture 2, 01/16/2020 # Part I # In this code, we'll go over some introductory concepts for using R. # Then we'll implement the following statistical measures: # Mean, median, variance, standard deviation, IQR, skewness. # We'll also show how to plot boxplots, histograms, and scatterplots # in base R. ################################################################################ # First, we'll generate some random data by concatenating a series of values x1 <- c(2, 4, 8, 9, 11, 11, 120) # note, we use c() to concatenate an array in R. # check out the data, make sure it's what we think it is x1 # print the vector class(x1) # check the data type length(x1) # find the length of the data dim(x1) # Other ways to "get data" are to download it from online: rain <- read.table("http://civil.colorado.edu/~balajir/r-session-files/aismr.txt") View(rain) help(read.table) # get help for a certain command class(rain) names(rain) <- c('Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') # name the columns # Indexing columns of a dataframe View(rain$Year) # use the '$' to index a single column of a dataframe # Turn the rain dataframe back to a matrix and see what happens rain_matrix <- as.matrix(rain, dimnames = NULL) class(rain_matrix) rain_matrix$Year #uh oh, we can't use the '$' to index a matrix # Indexing a matrix years <- rain_matrix[,1] #indexes the first column year_1872 <- rain_matrix[2,] #indexes the second row # What about importing data from a file on your own computer? # Determine your working directory getwd() # try to read in data from file rain2 <- read.table("aismr.txt") # set your working directory setwd("C:/Users/mdero/Documents/CVEN_5454") # try to read in data again rain2 <- read.table("aismr.txt") ################################################################################ # Now we'll show the R commands for some simple statistics x1 <- c(2, 4, 8, 9, 11, 11, 120) # calculate the mean (built in R functions) x1_mean <- mean(x1) # calculate the median x1_median <- median(x1) # calculate the variance x1_var <- var(x1) # calculate the std. dev x1_stdev <- sd(x1) # check that std. dev is square root of variance print(x1_stdev) print(x1_var^(.5)) # calculate the IQR x1_IQR <- IQR(x1) # calculate the skew, but first we need an extra R package # install.packages() library(e1071) x1_skew <- skewness(x1) ################################################################################ # Next we'll introduce histograms base R: # Below is some fake data measurements of total organic carbon (TOC) in ppm toc <- c(21, 18, 43, 21, 11, 18, 22, 19, 24, 25, 15, 20, 26, 23, 22, 18, 27) # a histogram is a way to represent the distribution of a variable via "binning" hist(toc) # very basic histogram in 'base R' # use some additional arguments like specifying number of breaks, x label, title hist(x = toc, breaks = 5, xlab = "TOC (ppm)", main = "") ################################################################################ # Now, we'll do boxplots in base R: # A boxplot is another method for representing a distribution of a variable # (more condensed than a histogram) # Boxplot notes: # - middle line is the median # - box represents the 25th and 75 percentiles of the data # - whiskers represent 1.5 * IQR # - any outliers are plotted outside the whisker range boxplot(toc) # looks dumb! resize to use elsewhere boxplot(x = toc, xlab = "", ylab = "TOC (ppm)", main = "Boxplot of TOC") ################################################################################ # Last, we'll do a scatterplot in base R # generate some random data from a normal distribution (length = 10) x <- rnorm(n = 10, mean = 0, sd = 1) y <- rnorm(n = 10, mean = 0, sd = 1) # generate a very simple scatterplot plot(x, y, pch = 16) # add a line with an intcerpt of zero and slope of one abline(0,1, col = "red")