In [1]:
library(tidyverse)
library(readxl)
library(FactoMineR)
library(factoextra)
library(tidyverse)
library(readxl)
library(FactoMineR)
library(factoextra)
library('cluster')
library('ggplot2')
library('fields')

options(warn=-1)
-- Attaching packages --------------------------------------- tidyverse 1.3.1 --

v ggplot2 3.3.5     v purrr   0.3.4
v tibble  3.1.4     v dplyr   1.0.7
v tidyr   1.1.3     v stringr 1.4.0
v readr   2.0.1     v forcats 0.5.1

-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Loading required package: spam

Loading required package: dotCall64

Loading required package: grid

Spam version 2.7-0 (2021-06-25) is loaded.
Type 'help( Spam)' or 'demo( spam)' for a short introduction 
and overview of this package.
Help for individual functions is also obtained by adding the
suffix '.spam' to the function name, e.g. 'help( chol.spam)'.


Attaching package: 'spam'


The following objects are masked from 'package:base':

    backsolve, forwardsolve


Loading required package: viridis

Loading required package: viridisLite

See https://github.com/NCAR/Fields for
 an extensive vignette, other supplements and source code 

In [2]:
library(ggplot2)
library(cluster)      # To perform clustering of the Extremes
#library(mclust)
In [3]:
set.seed(1)
In [4]:
Win_df = read.table("Max_Winter_Seas_Prec.txt", header = T)

#Summer (Jun-Sep)
#Sum_df = read.table("Max_summer_Seas_Prec.txt", header = T)

#this is summer data/ need for spatial elements
Precep_df = read.table("Precipitaton_data.txt", header = T)


Elev_grid = read.table('Elevation_grid_1deg (1).txt',header = T)

merge_data <- function(df1, df2) {
  #Transpose data
  df1 = setNames(data.frame(t(df1[,-1])), df1[,1])

  #Create the mean column per location 
  df1['Mean_Rainfall'] = rowMeans(df1)
  
  #resetting the index so that location is a column rather than an index
  df1 <- cbind(Location = rownames(df1), df1)
  rownames(df1) <- 1:nrow(df1)
  
  # merge to have long/lat data
  df3 = merge(df1, df2, by="row.names", all.x=TRUE) 
  
  df3 <- df3[, -c(1, 3:57)]

  return(df3)
}

win_data = merge_data(Win_df, Precep_df)

y = win_data[,2]
X = win_data[,3:5]
X1 = win_data[,3:4]
datas=scale((X))
data1s=scale((X1))
In [5]:
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
In [6]:
# Determine number of clusters by iterations
Niter=50
WSS = matrix(nrow=Niter, ncol=15)
WSS1 = matrix(nrow=Niter, ncol=15)
for (j in 1:Niter) {
  WSS[j,1]= (nrow(datas)-1)*sum(apply(datas,2,var))#with elev
  WSS1[j,1]= (nrow(data1s)-1)*sum(apply(data1s,2,var))#without elev
  for (i in 2:15) {
  WSS[j,i] = sum(kmeans(datas, centers=i)$withinss)
  WSS1[j,i]= sum(kmeans(data1s, centers=i)$withinss)
  }}
wss=apply(WSS, 2, FUN = mean)
wss1=apply(WSS1, 2, FUN = mean)

P1=ggplot() +
  geom_line(mapping = aes(1:15, wss)) +
 geom_point(mapping = aes(1:15, wss), shape = 21, size = 3, color = "gray30", fill ="cadetblue1")+
 labs(title = "Considering elevation",x = "Number of Clusters",y = "Within groups sum of squares")+
  theme(plot.title = element_text(size = 15, face = "bold", hjust = 0.5))

P2=ggplot() +
  geom_line(mapping = aes(1:15, wss1)) +
 geom_point(mapping = aes(1:15, wss1), shape = 21, size = 3, color = "gray30", fill ="cadetblue1")+
 labs(title = "Without considering elevation",x = "Number of Clusters",y = "Within groups sum of squares")+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
multiplot(P1,P2, cols = 2)
In [7]:
## find the number of clusters for each case
#Considering elevation
slope=wss[2:length(wss)]-wss[1:(length(wss)-1)]
ind=which(slope== max(slope))#optimal number of clusters
#Without considering elevation
slope=wss1[2:length(wss1)]-wss1[1:(length(wss1)-1)]
ind1=which(slope== max(slope))#optimal number of clusters
N_cluster=c(ind,ind1); # numbers cluster considered column1: W\Elev; Column2:Wo\Elev
# K-Means Cluster Analysis
fit <- kmeans(datas, N_cluster[1]) #W\Elev
fit1 <- kmeans(data1s, N_cluster[2]) #Wo\Elev
# get cluster means 
means=aggregate(x = X,by=list(fit$cluster),FUN=mean)
means1=aggregate(x = X1,by=list(fit1$cluster),FUN=mean)
# append cluster assignment
Cluster=factor(fit$cluster)
data_1 = data.frame(X,Cluster) 
Cluster=factor(fit1$cluster)
data1_1 = data.frame(X1,Cluster)
In [8]:
p_map = ggplot(data_1)+
  geom_point(aes(Lon,Lat,color=factor(Cluster)))+
  labs(title = paste("Clustering With Elev"),
       x = "Longitude", y = "Latitude")+

  #scale_color_manual('Cluster',values=(Cluster))+
  coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map)
In [9]:
p_map = ggplot(data1_1)+
  geom_point(aes(Lon,Lat,color=factor(Cluster)))+
  labs(title = paste("Clustering Without Elev"),
       x = "Longitude", y = "Latitude")+

  #scale_color_manual('Cluster',values=(Cluster))+
  coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map)
In [10]:
fviz_cluster(fit, data = X, geom = c("point"),ellipse.type = "euclid")

fviz_cluster(fit1, data = X1, geom = c("point"),ellipse.type = "euclid")
Too few points to calculate an ellipse

Too few points to calculate an ellipse

Too few points to calculate an ellipse

Too few points to calculate an ellipse

Too few points to calculate an ellipse

Too few points to calculate an ellipse

Too few points to calculate an ellipse

Too few points to calculate an ellipse

In [11]:
N_cluster=c(5,4); # numbers cluster considered column1: W\Elev; Column2:Wo\Elev
# K-Means Cluster Analysis
fit <- kmeans(datas, N_cluster[1]) #W\Elev
fit1 <- kmeans(data1s, N_cluster[2]) #Wo\Elev
# get cluster means 
means=aggregate(X,by=list(fit$cluster),FUN=mean)
means1=aggregate(X1,by=list(fit1$cluster),FUN=mean)
# append cluster assignment
Cluster=factor(fit$cluster)
data_6 = data.frame(X,Cluster) 
Cluster=factor(fit1$cluster)
data1_6 = data.frame(X1,Cluster)
In [12]:
p_map = ggplot(data_6)+
  geom_point(aes(Lon,Lat,color=factor(Cluster)))+
  labs(title = paste("Clustering Without Elev"),
       x = "Longitude", y = "Latitude")+

  #scale_color_manual('Cluster',values=(Cluster))+
  coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map)

p_map = ggplot(data1_6)+
  geom_point(aes(Lon,Lat,color=factor(Cluster)))+
  labs(title = paste("Clustering Without Elev"),
       x = "Longitude", y = "Latitude")+

  #scale_color_manual('Cluster',values=(Cluster))+
  coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map)
In [13]:
fviz_cluster(fit, data = X, geom = c("point"),ellipse.type = "euclid")

fviz_cluster(fit1, data = X1, geom = c("point"),ellipse.type = "euclid")
In [15]:
N_cluster=c(3,3); # numbers cluster considered column1: W\Elev; Column2:Wo\Elev
# K-Means Cluster Analysis
fit <- kmeans(datas, N_cluster[1]) #W\Elev
fit1 <- kmeans(data1s, N_cluster[2]) #Wo\Elev
# get cluster means 
means=aggregate(X,by=list(fit$cluster),FUN=mean)
means1=aggregate(X1,by=list(fit1$cluster),FUN=mean)
# append cluster assignment
Cluster=factor(fit$cluster)
data_6 = data.frame(X,Cluster) 
Cluster=factor(fit1$cluster)
data1_6 = data.frame(X1,Cluster) 

p_map = ggplot(data_6)+
  geom_point(aes(Lon,Lat,color=factor(Cluster)))+
  labs(title = paste("Clustering Without Elev"),
       x = "Longitude", y = "Latitude")+

  #scale_color_manual('Cluster',values=(Cluster))+
  coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map)

p_map = ggplot(data1_6)+
  geom_point(aes(Lon,Lat,color=factor(Cluster)))+
  labs(title = paste("Clustering Without Elev"),
       x = "Longitude", y = "Latitude")+

  #scale_color_manual('Cluster',values=(Cluster))+
  coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map)
In [14]:
seeds_df_sc <- as.data.frame(scale(datas))
dist_mat <- dist(seeds_df_sc, method = 'euclidean')
hclust_avg <- hclust(dist_mat)
plot(hclust_avg)
rect.hclust(hclust_avg, k = 4, border = 2:5)