library(tidyverse)
library(readxl)
library(FactoMineR)
library(factoextra)
library('cluster')
library('ggplot2')
library('fields')
library(ggplot2)
library(cluster) # To perform clustering of the Extremes
#library(mclust)
Win_df = read.table("Max_Winter_Seas_Prec.txt", header = T)
#Summer (Jun-Sep)
#Sum_df = read.table("Max_summer_Seas_Prec.txt", header = T)
#this is summer data/ need for spatial elements
Precep_df = read.table("Precipitaton_data.txt", header = T)
Elev_grid = read.table('Elevation_grid_1deg (1).txt',header = T)
merge_data <- function(df1, df2) {
#Transpose data
df1 = setNames(data.frame(t(df1[,-1])), df1[,1])
#Create the mean column per location
df1['Mean_Rainfall'] = rowMeans(df1)
#resetting the index so that location is a column rather than an index
df1 <- cbind(Location = rownames(df1), df1)
rownames(df1) <- 1:nrow(df1)
# merge to have long/lat data
df3 = merge(df1, df2, by="row.names", all.x=TRUE)
df3 <- df3[, -c(1, 3:57)]
return(df3)
}
win_data = merge_data(Win_df, Precep_df)
y = win_data[,2]
X = win_data[,3:6]
X$station = win_data[,1]
X1 = win_data[,3:4]
#datas=scale((X))
#data1s=scale((X1))
pam_fmado_ll = function (x, k, ll) {
# x - Design matrix, typically colums are stations, rows are time
# k - Number of clusters
# ll - two column matrix of lat long points (or preferably projected) with N rows
N = ncol(x) # number of stations
T = nrow(x) # number of time points
# compute the F-madogram distance
V = array(NaN, dim = c(T, N))
for (p in 1:N) {
x.vec = as.vector(x[, p])
Femp = ecdf(x.vec)(x.vec)
V[, p] = Femp
}
print(N)
DD = dist(t(V), method = "manhattan", diag = TRUE, upper = TRUE)/(2 * T)
print(DD)
# weight by physical distance
DDll = dist(ll,method='manhattan')
#print(DDll)
DDw = as.matrix(DD) + t(t(as.matrix(DDll))/apply(as.matrix(DDll),2,max))*max(as.matrix(DD))
# do the clustering
output = pam(DDw, k, diss = TRUE, medoids = NULL)
return(output)
}
Pm = Win_df[2:74]
grid = win_data[,3:4]
ks = 2:20
# Do the clustering for all k values
cluster_list = list()
for(k in ks){
#message('Trying ',k,' clusters')
cluster_list[[which(ks == k)]] = pam_fmado_ll(Pm,k,grid)
}
# find the average silhouette value (smaller is better)
sil = sapply(cluster_list,function(x)x$silinfo$avg.width)
ggplot() +
geom_line(mapping = aes(ks,sil)) +
geom_point(mapping = aes(ks,sil), shape = 21, size = 3, color = "gray30", fill ="cadetblue1") +
labs(title = "Silhouette width",x = 'Number of clusters',y = 'Silhouette width')+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
# find the optimal k
optimal_k = ks[which.max(sil)]
message('The optimal number of clusters is: ',optimal_k)
The optimal number of clusters is: 3
## for optimal number of clusters
clusters = cbind(grid,cluster=cluster_list[[which(ks==optimal_k)]]$clustering)
p_map = ggplot(clusters)+
geom_point(aes(Lon,Lat,color=factor(cluster)))+
labs(title = paste("Clustering of the Extremes spatial map with K=",optimal_k," (optimal)",sep = ""),
x = "Longitude", y = "Latitude")+
scale_color_manual('Cluster',values=tim.colors(optimal_k))+
coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map)
## for optimal number of clusters
clusters = cbind(grid,cluster=cluster_list[[which(ks==optimal_k)]]$clustering)
p_map = ggplot(clusters)+
geom_point(aes(Lon,Lat,color=factor(cluster)))+
labs(title = paste("Clustering of the Extremes spatial map with K=",optimal_k," (optimal)",sep = ""),
x = "Longitude", y = "Latitude")+
scale_color_manual('Cluster',values=tim.colors(optimal_k))+
coord_quickmap()+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map)
clusters = cbind(grid,cluster=cluster_list[[which(ks==5)]]$clustering)
p_map1 = ggplot(clusters)+
ggtitle("Clustering of the Extremes spatial map with K=5") +
geom_point(aes(Lon,Lat,color=factor(cluster)))+
scale_color_manual('Cluster',values=tim.colors(8))+
coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map1)