library('cluster') library('ggplot2') library('fields') #source('lib.R') source("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/HW2-sst-monsoon/extreme_clustering_example/lib.R") ks = 2:20 # read in data, can be a matrix or a data.frame #precip_df = read.csv('spring_precip.csv') precip_df=read.csv("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/HW2-sst-monsoon/extreme_clustering_example/spring_precip.csv") #ll_df = read.csv('lat_lon.csv') ll_df = read.csv("http://civil.colorado.edu/~balajir/CVEN6833/R-sessions/session2/HW2-sst-monsoon/extreme_clustering_example/lat_lon.csv") # Do the clustering for all k values cluster_list = list() for(k in ks){ message('Trying ',k,' clusters') cluster_list[[which(ks == k)]] = pam_fmado_ll(precip_df,k,ll_df) } # find the average silhouette value (smaller is better) sil = sapply(cluster_list,function(x)x$silinfo$avg.width) # plot the ave silhouette value versus k, look for a maximum p_sil = qplot(ks,sil)+ geom_abline(aes(intercept=min(sil)),slope=0)+ xlab('Number of clusters')+ ylab('Silhouette')+ theme_minimal() print(p_sil) # find the optimal k optimal_k = ks[which.max(sil)] message('The optimal number of clusters is: ',optimal_k) # plot the clusters spatially dev.new() clusters = cbind(ll_df,cluster=cluster_list[[which(ks==optimal_k)]]$clustering) p_map = ggplot(clusters)+ geom_point(aes(lon,lat,color=factor(cluster)))+ scale_color_manual('Cluster',values=tim.colors(optimal_k))+ theme_minimal()+ borders('state')+ coord_quickmap(xlim=c(-125,-105),ylim=c(31,50)) print(p_map)