Clustering with relational constraint / Hierarchical clustering / Tolerant

Clustering

Tolerant / Example
https://raw.githubusercontent.com/bavla/cluRC/master/SomeTyW.net

wdir = "C:/Users/batagelj/work/Python/graph/Nets/relCon"
gdir = "c:/users/batagelj/work/python/graph/Nets"
 
import sys, os, re, datetime, json
sys.path = [gdir]+sys.path; os.chdir(wdir)
from TQ import *
from Nets import Network as N
import numpy as np
from copy import copy, deepcopy
 
def printNet(net,nam,active):
  for e in net.links():
    u = net.initNode(e); v = net.termNode(e)
    w = net.getLink(e,'w',null=float('inf'))
    print(e,":",u,v,nam[u-1],nam[v-1],w)
 
def tolerant(net,method="max"):
  global m
  infinity = float('inf')  
  def ordNode(S): return [ int(a) for a in S ]
  def orDendro(i):
    global m
    if i<0: return [-i]
    else: return orDendro(m[i-1,0])+orDendro(m[i-1,1])
  G = deepcopy(net); G.delLoops() 
  num = len(net._nodes); numm = num-1; nam = [""]*num
  for i in range(num):
    nam[i] = net._nodes[i+1][3]['lab']
  active = set(range(num)); m = np.zeros((numm,2),dtype=int) 
  node = np.zeros(num,dtype=int); h = np.zeros(numm)
  w=np.empty(num); w.fill(1)
  for k in range(numm):
    printNet(G,nam,active)
  # determine the closest pair of clusters (p,q)
    numA = len(active); dmin = infinity; bctive = deepcopy(active)
    print(list(G.links()))
    for e in G.links():
      u,v,isArc,r,W = G._links[e]; we = W['w']
      if we < dmin: dmin = we; p,q,em = u-1,v-1,e
    if dmin == infinity: break
    G.delLink(em)
  # join the closest pair of clusters
    h[k] = dmin; active.discard(q)
    m[k][0] = -(p+1) if node[p]==0 else node[p]
    m[k][1] = -(q+1) if node[q]==0 else node[q]
    print(k+1," p,q:",p,q,"m:",m[k][0],m[k][1],"dmin=",dmin,">",active)   
  # update the network
    u = p+1; v = q+1
    for e in list(G.star(v)):
      print("e:",e,">",G._links[e]) 
      s = G.twin(v,e); we = G.getLink(e,'w',null=infinity)
      G.delLink(e)
    # determine dissimilarities to the new cluster
      if s != u:
        if not G.linked(u,s): G.addArc(u,s,w={'w':we+0})
        else:
          if G.arcTo(u,s): f = G._nodes[u][2][s][0]
          else: f = G._nodes[u][1][s][0]
          wf = G.getLink(f,'w',null=infinity)
          if method=="max": D = max(we,wf)
          elif method=="min": D = min(we,wf)
          elif method=="ward":
            ww = w[p]+w[q]+w[s-1]
            D = ((w[q]+w[s-1])*we + (w[p]+w[s-1])*wf - w[s-1]*h[k])/ww
          else: print('unknown method',method,'\n'); return(None)
          G.setLink(f,'w',D+0)
    node[p] = k+1; w[p] = w[p]+w[q];
  return {'proc':"hclusRC", 'merge':m.tolist(), 'height':h.tolist(),
        'order': ordNode(orDendro(len(m))), 'labels': nam,
        'method':'tolerant', 'call':None, 'dist.method':method }
 
net = N.loadPajek("SomeTyW.net")
net.Info()
num = len(net._nodes); nam = [""]*num
for i in range(num): nam[i] = net._nodes[i+1][3]['lab']
 
rez = tolerant(net,method="min")
js = open("SomeTyTol.json",'w'); json.dump(rez, js, indent=1); js.close()
> wdir <- "C:/Users/batagelj/work/Python/graph/Nets/relCon"
> setwd(wdir)
> library(jsonlite)
> js <- "SomeTyTol.json"
> R <- fromJSON(js)
> attr(R,"class") <- "hclust"
> plot(R,hang=-1)

TO DO:

  • additional strategies
  • nonconnected solution
  • speed-up heap dictionary
  • isArc
vlado/work/alg/rcht.txt · Last modified: 2020/07/27 16:51 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki