library(tidyverse)
library(readxl)
library(FactoMineR)
library(factoextra)
library(tidyverse)
library(readxl)
library(FactoMineR)
library(factoextra)
library('cluster')
library('ggplot2')
library('fields')
options(warn=-1)
-- Attaching packages --------------------------------------- tidyverse 1.3.1 -- v ggplot2 3.3.5 v purrr 0.3.4 v tibble 3.1.4 v dplyr 1.0.7 v tidyr 1.1.3 v stringr 1.4.0 v readr 2.0.1 v forcats 0.5.1 -- Conflicts ------------------------------------------ tidyverse_conflicts() -- x dplyr::filter() masks stats::filter() x dplyr::lag() masks stats::lag() Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa Loading required package: spam Loading required package: dotCall64 Loading required package: grid Spam version 2.7-0 (2021-06-25) is loaded. Type 'help( Spam)' or 'demo( spam)' for a short introduction and overview of this package. Help for individual functions is also obtained by adding the suffix '.spam' to the function name, e.g. 'help( chol.spam)'. Attaching package: 'spam' The following objects are masked from 'package:base': backsolve, forwardsolve Loading required package: viridis Loading required package: viridisLite See https://github.com/NCAR/Fields for an extensive vignette, other supplements and source code
library(ggplot2)
library(cluster) # To perform clustering of the Extremes
#library(mclust)
set.seed(1)
Win_df = read.table("Max_Winter_Seas_Prec.txt", header = T)
#Summer (Jun-Sep)
#Sum_df = read.table("Max_summer_Seas_Prec.txt", header = T)
#this is summer data/ need for spatial elements
Precep_df = read.table("Precipitaton_data.txt", header = T)
Elev_grid = read.table('Elevation_grid_1deg (1).txt',header = T)
merge_data <- function(df1, df2) {
#Transpose data
df1 = setNames(data.frame(t(df1[,-1])), df1[,1])
#Create the mean column per location
df1['Mean_Rainfall'] = rowMeans(df1)
#resetting the index so that location is a column rather than an index
df1 <- cbind(Location = rownames(df1), df1)
rownames(df1) <- 1:nrow(df1)
# merge to have long/lat data
df3 = merge(df1, df2, by="row.names", all.x=TRUE)
df3 <- df3[, -c(1, 3:57)]
return(df3)
}
win_data = merge_data(Win_df, Precep_df)
y = win_data[,2]
X = win_data[,3:5]
X1 = win_data[,3:4]
datas=scale((X))
data1s=scale((X1))
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
# Determine number of clusters by iterations
Niter=50
WSS = matrix(nrow=Niter, ncol=15)
WSS1 = matrix(nrow=Niter, ncol=15)
for (j in 1:Niter) {
WSS[j,1]= (nrow(datas)-1)*sum(apply(datas,2,var))#with elev
WSS1[j,1]= (nrow(data1s)-1)*sum(apply(data1s,2,var))#without elev
for (i in 2:15) {
WSS[j,i] = sum(kmeans(datas, centers=i)$withinss)
WSS1[j,i]= sum(kmeans(data1s, centers=i)$withinss)
}}
wss=apply(WSS, 2, FUN = mean)
wss1=apply(WSS1, 2, FUN = mean)
P1=ggplot() +
geom_line(mapping = aes(1:15, wss)) +
geom_point(mapping = aes(1:15, wss), shape = 21, size = 3, color = "gray30", fill ="cadetblue1")+
labs(title = "Considering elevation",x = "Number of Clusters",y = "Within groups sum of squares")+
theme(plot.title = element_text(size = 15, face = "bold", hjust = 0.5))
P2=ggplot() +
geom_line(mapping = aes(1:15, wss1)) +
geom_point(mapping = aes(1:15, wss1), shape = 21, size = 3, color = "gray30", fill ="cadetblue1")+
labs(title = "Without considering elevation",x = "Number of Clusters",y = "Within groups sum of squares")+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))
multiplot(P1,P2, cols = 2)
## find the number of clusters for each case
#Considering elevation
slope=wss[2:length(wss)]-wss[1:(length(wss)-1)]
ind=which(slope== max(slope))#optimal number of clusters
#Without considering elevation
slope=wss1[2:length(wss1)]-wss1[1:(length(wss1)-1)]
ind1=which(slope== max(slope))#optimal number of clusters
N_cluster=c(ind,ind1); # numbers cluster considered column1: W\Elev; Column2:Wo\Elev
# K-Means Cluster Analysis
fit <- kmeans(datas, N_cluster[1]) #W\Elev
fit1 <- kmeans(data1s, N_cluster[2]) #Wo\Elev
# get cluster means
means=aggregate(x = X,by=list(fit$cluster),FUN=mean)
means1=aggregate(x = X1,by=list(fit1$cluster),FUN=mean)
# append cluster assignment
Cluster=factor(fit$cluster)
data_1 = data.frame(X,Cluster)
Cluster=factor(fit1$cluster)
data1_1 = data.frame(X1,Cluster)
p_map = ggplot(data_1)+
geom_point(aes(Lon,Lat,color=factor(Cluster)))+
labs(title = paste("Clustering With Elev"),
x = "Longitude", y = "Latitude")+
#scale_color_manual('Cluster',values=(Cluster))+
coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map)
p_map = ggplot(data1_1)+
geom_point(aes(Lon,Lat,color=factor(Cluster)))+
labs(title = paste("Clustering Without Elev"),
x = "Longitude", y = "Latitude")+
#scale_color_manual('Cluster',values=(Cluster))+
coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map)
fviz_cluster(fit, data = X, geom = c("point"),ellipse.type = "euclid")
fviz_cluster(fit1, data = X1, geom = c("point"),ellipse.type = "euclid")
Too few points to calculate an ellipse Too few points to calculate an ellipse Too few points to calculate an ellipse Too few points to calculate an ellipse Too few points to calculate an ellipse Too few points to calculate an ellipse Too few points to calculate an ellipse Too few points to calculate an ellipse
N_cluster=c(5,4); # numbers cluster considered column1: W\Elev; Column2:Wo\Elev
# K-Means Cluster Analysis
fit <- kmeans(datas, N_cluster[1]) #W\Elev
fit1 <- kmeans(data1s, N_cluster[2]) #Wo\Elev
# get cluster means
means=aggregate(X,by=list(fit$cluster),FUN=mean)
means1=aggregate(X1,by=list(fit1$cluster),FUN=mean)
# append cluster assignment
Cluster=factor(fit$cluster)
data_6 = data.frame(X,Cluster)
Cluster=factor(fit1$cluster)
data1_6 = data.frame(X1,Cluster)
p_map = ggplot(data_6)+
geom_point(aes(Lon,Lat,color=factor(Cluster)))+
labs(title = paste("Clustering Without Elev"),
x = "Longitude", y = "Latitude")+
#scale_color_manual('Cluster',values=(Cluster))+
coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map)
p_map = ggplot(data1_6)+
geom_point(aes(Lon,Lat,color=factor(Cluster)))+
labs(title = paste("Clustering Without Elev"),
x = "Longitude", y = "Latitude")+
#scale_color_manual('Cluster',values=(Cluster))+
coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map)
fviz_cluster(fit, data = X, geom = c("point"),ellipse.type = "euclid")
fviz_cluster(fit1, data = X1, geom = c("point"),ellipse.type = "euclid")
N_cluster=c(3,3); # numbers cluster considered column1: W\Elev; Column2:Wo\Elev
# K-Means Cluster Analysis
fit <- kmeans(datas, N_cluster[1]) #W\Elev
fit1 <- kmeans(data1s, N_cluster[2]) #Wo\Elev
# get cluster means
means=aggregate(X,by=list(fit$cluster),FUN=mean)
means1=aggregate(X1,by=list(fit1$cluster),FUN=mean)
# append cluster assignment
Cluster=factor(fit$cluster)
data_6 = data.frame(X,Cluster)
Cluster=factor(fit1$cluster)
data1_6 = data.frame(X1,Cluster)
p_map = ggplot(data_6)+
geom_point(aes(Lon,Lat,color=factor(Cluster)))+
labs(title = paste("Clustering Without Elev"),
x = "Longitude", y = "Latitude")+
#scale_color_manual('Cluster',values=(Cluster))+
coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map)
p_map = ggplot(data1_6)+
geom_point(aes(Lon,Lat,color=factor(Cluster)))+
labs(title = paste("Clustering Without Elev"),
x = "Longitude", y = "Latitude")+
#scale_color_manual('Cluster',values=(Cluster))+
coord_quickmap(xlim=c(-102.2,-114.1),ylim=c(30,42))+
theme_minimal()+
theme(plot.title = element_text(size = 13, face = "bold", hjust = 0.5))+
borders('state')
print(p_map)
seeds_df_sc <- as.data.frame(scale(datas))
dist_mat <- dist(seeds_df_sc, method = 'euclidean')
hclust_avg <- hclust(dist_mat)
plot(hclust_avg)
rect.hclust(hclust_avg, k = 4, border = 2:5)