In [ ]:
library(tidyverse)
library(readxl)
library(FactoMineR)
library(factoextra)
library('cluster')
library('ggplot2')
library('fields')
In [2]:
library(ggplot2)
library(cluster)      # To perform clustering of the Extremes
#library(mclust)
In [3]:
Win_df = read.table("Max_Winter_Seas_Prec.txt", header = T)

#Summer (Jun-Sep)
#Sum_df = read.table("Max_summer_Seas_Prec.txt", header = T)

#this is summer data/ need for spatial elements
Precep_df = read.table("Precipitaton_data.txt", header = T)


Elev_grid = read.table('Elevation_grid_1deg (1).txt',header = T)

merge_data <- function(df1, df2) {
  #Transpose data
  df1 = setNames(data.frame(t(df1[,-1])), df1[,1])

  #Create the mean column per location 
  df1['Mean_Rainfall'] = rowMeans(df1)
  
  #resetting the index so that location is a column rather than an index
  df1 <- cbind(Location = rownames(df1), df1)
  rownames(df1) <- 1:nrow(df1)
  
  # merge to have long/lat data
  df3 = merge(df1, df2, by="row.names", all.x=TRUE) 
  
  df3 <- df3[, -c(1, 3:57)]

  return(df3)
}

win_data = merge_data(Win_df, Precep_df)

y = win_data[,2]
X = win_data[,3:6]
X$station = win_data[,1]
X1 = win_data[,3:4]
#datas=scale((X))
#data1s=scale((X1))
In [4]:
pam_fmado_ll = function (x, k, ll) {
  
  # x - Design matrix, typically colums are stations, rows are time 
  # k - Number of clusters
  # ll - two column matrix of lat long points (or preferably projected) with N rows
  
  N = ncol(x) # number of stations 
  T = nrow(x) # number of time points
  
  # compute the F-madogram distance
  V = array(NaN, dim = c(T, N))
  for (p in 1:N) {
    x.vec = as.vector(x[, p])
    Femp = ecdf(x.vec)(x.vec)
    V[, p] = Femp
  }
  print(N)
  DD = dist(t(V), method = "manhattan", diag = TRUE, upper = TRUE)/(2 * T)
  print(DD)
  # weight by physical distance
  DDll = dist(ll,method='manhattan')
    
    #print(DDll)
  DDw = as.matrix(DD) + t(t(as.matrix(DDll))/apply(as.matrix(DDll),2,max))*max(as.matrix(DD))
  
  # do the clustering
  output = pam(DDw, k, diss = TRUE, medoids = NULL)
  return(output)
}
In [5]:
Pm = Win_df[2:74]

grid = win_data[,3:4]
In [ ]:
ks = 2:20

# Do the clustering for all k values
cluster_list = list()
for(k in ks){
  #message('Trying ',k,' clusters')
  cluster_list[[which(ks == k)]] = pam_fmado_ll(Pm,k,grid)
}

# find the average silhouette value (smaller is better)
sil = sapply(cluster_list,function(x)x$silinfo$avg.width)
In [7]:
ggplot() +
 geom_line(mapping = aes(ks,sil)) +
 geom_point(mapping = aes(ks,sil), shape = 21, size = 3, color = "gray30", fill ="cadetblue1") +
 labs(title = "Silhouette width",x = 'Number of clusters',y = 'Silhouette width')+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
In [8]:
# find the optimal k
optimal_k = ks[which.max(sil)]
message('The optimal number of clusters is: ',optimal_k)
The optimal number of clusters is: 3

In [9]:
## for optimal number of clusters 
clusters = cbind(grid,cluster=cluster_list[[which(ks==optimal_k)]]$clustering)
p_map = ggplot(clusters)+
  geom_point(aes(Lon,Lat,color=factor(cluster)))+
  labs(title = paste("Clustering of the Extremes spatial map with K=",optimal_k," (optimal)",sep = ""),
       x = "Longitude", y = "Latitude")+
  scale_color_manual('Cluster',values=tim.colors(optimal_k))+
  coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map)
In [10]:
## for optimal number of clusters 
clusters = cbind(grid,cluster=cluster_list[[which(ks==optimal_k)]]$clustering)
p_map = ggplot(clusters)+
  geom_point(aes(Lon,Lat,color=factor(cluster)))+
  labs(title = paste("Clustering of the Extremes spatial map with K=",optimal_k," (optimal)",sep = ""),
       x = "Longitude", y = "Latitude")+
  scale_color_manual('Cluster',values=tim.colors(optimal_k))+
  coord_quickmap()+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map)
In [11]:
clusters = cbind(grid,cluster=cluster_list[[which(ks==5)]]$clustering)
p_map1 = ggplot(clusters)+
  ggtitle("Clustering of the Extremes spatial map with K=5") +
  geom_point(aes(Lon,Lat,color=factor(cluster)))+
  scale_color_manual('Cluster',values=tim.colors(8))+
  coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
  theme_minimal()+
  theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
  borders('state')
print(p_map1)