Clustering in R / dissimilarity matrix
# adapted Ward's hierarchical clustering #--------------------------------------------- # VB, 16. July 2010 Clamix and R # Python version: VB, 15. July 2020 wdir = "C:/Users/batagelj/work/Python/graph/Nets/relCon" gdir = "c:/users/batagelj/work/python/graph/Nets" import sys, os, re, datetime, json sys.path = [gdir]+sys.path; os.chdir(wdir) from TQ import * from Nets import Network as N import numpy as np from copy import copy, deepcopy def hclus(Dis,method="max"): global m def ordNode(S): return [ int(a) for a in S ] infinity = float('inf') def orDendro(i): global m if i<0: return [-i] else: return orDendro(m[i-1,0])+orDendro(m[i-1,1]) D = Dis['dis'] num = len(D); numm = num-1 for i in range(num): D[i][i] = infinity active = set(range(num)); m = np.zeros((numm,2),dtype=int) node = np.zeros(num,dtype=int); h = np.zeros(numm) w=np.empty(num); w.fill(1) for k in range(numm): # determine the closest pair of clusters (p,q) numA = len(active); dmin = infinity; bctive = deepcopy(active) for a in active: bctive.remove(a) for b in bctive: if D[a][b] < dmin: dmin = D[a][b]; p,q = a,b # join the closest pair of clusters h[k] = dmin; active.discard(p) if node[p]==0: m[k][0] = -(p+1) else: m[k][0] = node[p] if node[q]==0: m[k][1] = -(q+1) else: m[k][1] = node[q] # determine dissimilarities to the new cluster for s in active: if s != q: if method=="max": D[q][s] = max(D[q][s],D[p][s]) elif method=="min": D[q][s] = min(D[q][s],D[p][s]) elif method=="ward": ww = w[p]+w[q]+w[s] D[q][s] = ((w[q]+w[s])*D[q][s] + (w[p]+w[s])*D[p][s] - w[s]*h[k])/ww else: print('unknown method',method,'\n'); return(None) D[s][q] = D[q][s] node[q] = k+1; w[q] = w[q]+w[p]; return {'proc':"hclusDi", 'merge':m.tolist(), 'height':h.tolist(), 'order': ordNode(orDendro(len(m))), 'labels': Dis['nam'], 'method':"hclus", 'call':None, 'dist.method':method } with open("SomeTy.dis",'r') as f: h = f.readline() dis = [[int(num) for num in line.strip().split(' ')] for line in f if line.strip() != ""] nam = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j' ] Dis = { 'nam': nam, 'dis': dis } r = hclus(Dis,method='ward') js = open("SomeTyTest.json",'w'); json.dump(r, js, indent=1); js.close()
> wdir <- "C:/Users/batagelj/work/Python/graph/Nets/relCon" > setwd(wdir) > library(jsonlite) > js <- "SomeTyTest.json" > Rw <- fromJSON(js) > attr(Rw,"class") <- "hclust" > mt <- paste("Cluster Dendrogram -",Rw$dist.method) > plot(Rw,hang=-1,main=mt)