library(kohonen) library(raster) library(rasterVis) library(maptools) library(RColorBrewer) # Set your working directory to the folder containing the datasets setwd('C:/Users/seanm/Documents/Teaching SOM/') # Wines Example ----------------------------------------------------------- set.seed(50) data(wines) head(wines) dim(wines) # Self-Organizing Map som.wines <- som(scale(wines), grid=somgrid(5,5,'rectangular'), rlen=100) # Plot of Vector Codes plot(som.wines) plot(som.wines,type='changes') # Plot number of observations in each node colors <- function(n, alpha = 1) { rev(heat.colors(n, alpha)) } plot(som.wines,type = "counts", palette.name = colors, heatkey = TRUE) # Plotting Points par(mfrow = c(1, 2)) plot(som.wines,type = "mapping", pchs = 20,main = "Mapping Type SOM") plot(som.wines,main = "Default SOM Plot") # Property Plot coolBlueHotRed <- function(n, alpha = 1) {rainbow(n, end=4/6, alpha=alpha)[n:1]} par(mfrow = c(1, 1)) plot(som.wines, type = "property", property = som.wines$codes[,1], main=names(som.wines$data)[1], palette.name=coolBlueHotRed) # Clustering Nodes # Hierarchical d <- dist(som.wines$codes) cah <- hclust(d,method="ward.D") plot(cah) groups <- cutree(cah,k=3) plot(som.wines,bgcol = rainbow(3)[groups], main="Hierarchical Clusters") add.cluster.boundaries(SOM,clustering=groups) # K means mydata <- som.wines$codes wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var)) for (i in 2:15) { wss[i] <- sum(kmeans(mydata, centers=i)$withinss) } plot(wss) som_cluster <- cutree(hclust(dist(som.wines$codes)), 4) plot(som.wines, type="codes", bgcol = rainbow(4)[som_cluster], main = "K-Means Clusters") add.cluster.boundaries(som.wines, som_cluster) # Toroidal Mapping som.wines2 <- som(scale(wines), grid = somgrid(5, 5, "hexagonal"), toroidal = TRUE) par(mfrow = c(1, 2)) plot(som.wines2, type = "mapping", pchs = 20, main = "Mapping Type SOM") plot(som.wines2, main = "Default SOM Plot") # Mapping Distance par(mfrow=c(1,1)) plot(som.wines2, type = "dist.neighbours", palette.name = terrain.colors) # Predictions ------------------------------------------------------------- training <- sample(nrow(wines), 120) Xtraining <- scale(wines[training, ]) Xtest <- scale(wines[-training, ], center = attr(Xtraining, "scaled:center"), scale = attr(Xtraining, "scaled:scale")) som.wines <- som(Xtraining, grid = somgrid(5, 5, "hexagonal")) som.prediction <- predict(som.wines, newdata = Xtest, trainX = Xtraining, trainY = factor(wine.classes[training])) table(wine.classes[-training], som.prediction$prediction) # Arctic Melt Onset SOM --------------------------------------------------- # EASE2.0 Coordinate Reference System ease <- '+proj=laea +lat_0=90 +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m' # read in data gph <- read.csv('gph_data.csv',header=T) # convert data frame into spatial object, then raster stack coordinates(gph) <- ~lon+lat gridded(gph)=TRUE gphstack <- stack(gph) data(wrld_simpl) crs(gphstack) <- crs(wrld_simpl) # re-project raster to EASE crs (twice to fix some missing points) gphstack <- projectRaster(gphstack, crs=CRS(ease)) gphstack <- projectRaster(gphstack, crs=CRS(ease)) # find locations with NA values and remove all_vals <- getValues(gphstack) index <- complete.cases(all_vals) vals <- all_vals[index,] # Perform SOM begin <- Sys.time() SOM <- som(scale(t(vals)), grid = somgrid(5,5,"rectangular")) end <- Sys.time() end-begin plot(SOM) # Plot number of observations in each node colors <- function(n, alpha = 1) { rev(heat.colors(n, alpha)) } plot(SOM, type = "counts", palette.name = colors, heatkey = TRUE) # Display composited GPH data into nodes all_vals <- t(all_vals) rlist <- list() for(i in 1:25){ tmp_df <- all_vals[which(SOM$unit.classif==i),] vals <- colMeans(tmp_df,na.rm=T) rlist[[i]] <- setValues(raster(gphstack,layer=1),vals) } rstack <- stack(rlist) # Create spatial polygons object to map land masses out <- crop(wrld_simpl, extent(-180, 180, 60, 83.57027)) wrld_arctic <- spTransform(out, CRS(ease)) # Crop for nicer maps e <- extent(-3400000,3400000,-3400000,3400000) rstack <- crop(rstack,e) mapTheme <- rasterTheme(region=rev(brewer.pal(11,'RdBu')), axis.line=list(col='transparent')) # Plot GPH data onto SOM map levelplot(rstack,margin=F, par.settings=mapTheme, main='GPH @ 500hpa Master SOM Map', at=seq(4850,5750,by=100), layout=c(5,5),scales=list(draw=FALSE), names.attr=c(1,2,3,4,5,rep('',20)), between=list(x=0.1,y=0.001)) + layer(sp.lines(wrld_arctic, col="grey20",lwd=0.2),under=F) # Map surface air temperature to Master SOM ------------------------------- temps <- read.csv('temperature_data.csv',header=T) # convert data frame into spatial object, then raster stack coordinates(temps) <- ~lon+lat gridded(temps)=TRUE tempsstack <- stack(temps) crs(tempsstack) <- crs(wrld_simpl) # re-project raster to EASE crs tempsstack <- projectRaster(tempsstack, crs=CRS(ease)) tempsstack <- projectRaster(tempsstack, crs=CRS(ease)) # get values from raster stack and transpose so columns are locations tempvals <- getValues(tempsstack) tempvals <- t(tempvals) # composite temperature days for each node rlist <- list() for(i in 1:25){ tmp_df <- tempvals[which(SOM$unit.classif==i),] vals <- colMeans(tmp_df,na.rm=T) rlist[[i]] <- setValues(raster(tempsstack,layer=1),vals) } rstack <- stack(rlist) e <- extent(-3400000,3400000,-3400000,3400000) rstack <- crop(rstack,e) mapTheme <- rasterTheme(region=rev(brewer.pal(11,'RdBu')), axis.line=list(col='transparent')) levelplot(rstack,margin=F, par.settings=mapTheme, main='Temperature @ 925hpa SOM Map', at=seq(240,295,by=5), layout=c(5,5),scales=list(draw=FALSE), names.attr=c(1,2,3,4,5,rep('',20)), between=list(x=0.1,y=0.001)) + layer(sp.lines(wrld_arctic, col="grey20",lwd=0.2),under=F)