Cluster Analysis - SOM

Apply self-organizing maps (SOM) to the Western US winter seasonal precipitation data. Try 3 x3 or 4 x 4 node configuration. For each node composite the winter SSTs. This will provide insights into the relationship between the SSTs and their signature on western US winter precipitation.

#### CLEAR MEMORY
rm(list=ls())
graphics.off()
#### Prevent Warnings
options(warn=-1)

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="C:/Users/DELL/Dropbox/Boulder/Courses/Advance data analysis/HW/HW2"
setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW2.R"))
source("Lib_HW2.R")
# unload packages that create a conflict with map("world2",add=T)
detach(package:mclust)
detach(package:sm)

Step to create SST data

Lat - Long grid creation

# information obtained from: http://iridl.ldeo.columbia.edu/SOURCES/.KAPLAN/.EXTENDED/.v2/ 
nrows=72
ncols=36
ntime = 118    #Nov-Mar 1901 - Nov-Mar 2018

### Lat - Long grid..
ygrid=seq(-87.5,87.5,by=5)
ny=length(ygrid) # number of latitudinal locations

xgrid=seq(27.5,382.5,by=5)
nx=length(xgrid) # number of longitudinal locations
nglobe = nrows*ncols
# creation of spatial grid
xygrid=matrix(0,nrow=nx*ny,ncol=2)
i=0
for(iy in 1:ny){
  for(ix in 1:nx){
    i=i+1
    xygrid[i,1]=ygrid[iy]
    xygrid[i,2]=xgrid[ix]
  }
}

Read Kaplan SST data..

data=readBin("./data/Kaplan-SST-NDJFM1900-NDJFM2018.r4",what="numeric", n=( nrows * ncols * ntime), size=4,endian="swap")

data <- array(data = data, dim=c( nrows, ncols, ntime ) )
data1=data[,,1]

# the lat -long data grid..

index=1:(nx*ny)

index1=index[data1 < 20 & data1 != "NaN"]   # only non-missing data.
xygrid1=xygrid[index1,]

nsites=length(index1)
data2=data1[index1]

### SSTdata matrix - rows are seasonal (i.e. one value per year)
## and columns are locations
sstannavg=matrix(NA,nrow=ntime, ncol=nsites)

for(i in 1:ntime){
  data1=data[,,i]
  index1=index[data1 < 20 & data1 != "NaN"]
  data2=data1[index1]
  sstannavg[i,]=data2
}  

indexgrid = index1
rm("data")  #remove the object data to clear up space

lon = sort(unique(xygrid[,2]))
lat = sort(unique(xygrid[,1]))

Read Winter Seasonal Maximun Precipitation data. only station with 67 years of data are considered (1948-2014)

# read in data
data= read.csv('./data/PAM_western_us_1.csv',header=T)

Perform SOM for 3x3

Ng=3
N_node=Ng*Ng#number of nodes
a=c(1,2)
Pm=t(data[,-a])
index=complete.cases(Pm)
vals=Pm[index,]
begin <- Sys.time()
SOM <- som(scale(vals),
           grid = somgrid(Ng,Ng,"rectangular"))
end <- Sys.time()
end-begin
## Time difference of 0.07295895 secs
#plot(SOM)

# # Plot number of observations in each node
colors <- function(n, alpha = 1) {
  rev(heat.colors(n, alpha))
}
plot(SOM, type = "counts",
     palette.name = colors,
     heatkey = TRUE)

Display composited WSMP data into nodes

prec_nodes=as.data.frame(array(NaN, dim = c(dim(Pm)[2],N_node)))
for(i in 1:N_node){
  index=which(SOM$unit.classif==i)
  tmp_df=Pm[index,]
  if(length(index)>1){
    prec_nodes[,i]=colMeans(tmp_df,na.rm=T)
  }else{
    prec_nodes[,i]=tmp_df
  }
}

zr=range(prec_nodes)
#plot composited WSMP data for each node
Nx=50
par( oma=c(0,2,3,6)) # save some room for the legend
par(mfrow = c(Ng, Ng))
#set.panel(4,4)
par(mar = c(3, 3, 2, 1))
for(i in 1:N_node){
  quilt.plot(data$lon, data$lat,prec_nodes[,i],main=paste("Node ",i,sep = ""),zlim=zr,add.legend=FALSE,nx=Nx, ny=Nx)

  US( add=TRUE, col="gray30", lwd=1)
}
mtext("Winter Seasonal Maximun Precipitation SOM Maps 3x3", outer = TRUE, side = 3, cex = 1.2, line = 1)
par( oma=c(1,0,0,1))
par(mar = c(0, 0, 0, 0))
image.plot(zlim=zr,legend.only=TRUE,legend.shrink=1,legend.width=3)

Display composited SST days for each node case: 3x3

# unload package kohonen that creates a conflict with map("world2",add=T)
detach(package:kohonen)
# Display composited SST days for each node case: 3x3
SST_nodes=as.data.frame(array(NaN, dim = c(dim(sstannavg)[2],N_node)))
for(i in 1:N_node){
  index=which(SOM$unit.classif==i)
  tmp_df=sstannavg[index,]
  if(length(index)>1){
    SST_nodes[,i]=colMeans(tmp_df,na.rm=T)
  }else{
    SST_nodes[,i]=tmp_df
  }
}

zr=range(SST_nodes)
Nx=50
par( oma=c(0,0,2.5,1)) # save some room for the legend
par(mfrow = c(Ng, Ng))
#set.panel(4,4)
par(mar = c(2, 3, 2, 1))
for(i in 1:N_node){
  zfull = rep(NaN,nglobe)   #also equal 72*36
  zfull[indexgrid]=SST_nodes[,i]
  zmat = matrix(zfull,nrow=nrows,ncol=ncols)
  image.plot(lon,lat,zmat,ylim=range(-40,70),lim=zr,main = paste("Node ",i,sep=""))
  contour(lon,lat,(zmat),ylim=range(-40,70),add=TRUE,nlev=6,lwd=1)
  map("world2",add=T)
}
mtext("Composite SST SOM Maps 3x3", outer = TRUE, side = 3, cex = 1.2, line = 1)

Comments about this problem are in the PDF file