Hierarchical clustering / dissimilarity matrix

Clustering

Clustering in R / dissimilarity matrix

# adapted Ward's hierarchical clustering
#---------------------------------------------
# VB, 16. July 2010  Clamix and R
# Python version: VB, 15. July 2020
 
wdir = "C:/Users/batagelj/work/Python/graph/Nets/relCon"
gdir = "c:/users/batagelj/work/python/graph/Nets"
 
import sys, os, re, datetime, json
sys.path = [gdir]+sys.path; os.chdir(wdir)
from TQ import *
from Nets import Network as N
import numpy as np
from copy import copy, deepcopy
 
def hclus(Dis,method="max"):
  global m
  def ordNode(S): return [ int(a) for a in S ]
  infinity = float('inf')  
  def orDendro(i):
    global m
    if i<0: return [-i]
    else: return orDendro(m[i-1,0])+orDendro(m[i-1,1])
  D = Dis['dis']
  num = len(D); numm = num-1
  for i in range(num): D[i][i] = infinity
  active = set(range(num)); m = np.zeros((numm,2),dtype=int) 
  node = np.zeros(num,dtype=int); h = np.zeros(numm)
  w=np.empty(num); w.fill(1)
  for k in range(numm):
  # determine the closest pair of clusters (p,q)
    numA = len(active); dmin = infinity; bctive = deepcopy(active)
    for a in active:
      bctive.remove(a)
      for b in bctive:
        if D[a][b] < dmin: dmin = D[a][b]; p,q = a,b          
  # join the closest pair of clusters
    h[k] = dmin; active.discard(p)
    if node[p]==0: m[k][0] = -(p+1)
    else: m[k][0] = node[p]
    if node[q]==0: m[k][1] = -(q+1)
    else: m[k][1] = node[q]     
  # determine dissimilarities to the new cluster
    for s in active:
      if s != q:
        if method=="max": D[q][s] = max(D[q][s],D[p][s])
        elif method=="min": D[q][s] = min(D[q][s],D[p][s])
        elif method=="ward":
          ww = w[p]+w[q]+w[s]
          D[q][s] = ((w[q]+w[s])*D[q][s] + (w[p]+w[s])*D[p][s] - w[s]*h[k])/ww
        else: print('unknown method',method,'\n'); return(None)
        D[s][q] = D[q][s]
    node[q] = k+1; w[q] = w[q]+w[p];
  return {'proc':"hclusDi", 'merge':m.tolist(), 'height':h.tolist(),
        'order': ordNode(orDendro(len(m))), 'labels': Dis['nam'],
        'method':"hclus", 'call':None, 'dist.method':method }
 
with open("SomeTy.dis",'r') as f: 
   h = f.readline()
   dis = [[int(num) for num in line.strip().split(' ')] for line in f if line.strip() != ""]
nam = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j' ]
Dis = { 'nam': nam, 'dis': dis }
r = hclus(Dis,method='ward')
js = open("SomeTyTest.json",'w'); json.dump(r, js, indent=1); js.close()
> wdir <- "C:/Users/batagelj/work/Python/graph/Nets/relCon"
> setwd(wdir)
> library(jsonlite)
> js <- "SomeTyTest.json"
> Rw <- fromJSON(js)
> attr(Rw,"class") <- "hclust"
> mt <- paste("Cluster Dendrogram -",Rw$dist.method)
> plot(Rw,hang=-1,main=mt)

vlado/work/alg/hcdi.txt · Last modified: 2020/07/16 04:34 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki