====== Clustering with relational constraint / Hierarchical clustering / Tolerant ======
[[vlado:work:clu|Clustering]]
[[pro:relc:tolex|Tolerant / Example]]\\
https://raw.githubusercontent.com/bavla/cluRC/master/SomeTyW.net
wdir = "C:/Users/batagelj/work/Python/graph/Nets/relCon"
gdir = "c:/users/batagelj/work/python/graph/Nets"
import sys, os, re, datetime, json
sys.path = [gdir]+sys.path; os.chdir(wdir)
from TQ import *
from Nets import Network as N
import numpy as np
from copy import copy, deepcopy
def printNet(net,nam,active):
for e in net.links():
u = net.initNode(e); v = net.termNode(e)
w = net.getLink(e,'w',null=float('inf'))
print(e,":",u,v,nam[u-1],nam[v-1],w)
def tolerant(net,method="max"):
global m
infinity = float('inf')
def ordNode(S): return [ int(a) for a in S ]
def orDendro(i):
global m
if i<0: return [-i]
else: return orDendro(m[i-1,0])+orDendro(m[i-1,1])
G = deepcopy(net); G.delLoops()
num = len(net._nodes); numm = num-1; nam = [""]*num
for i in range(num):
nam[i] = net._nodes[i+1][3]['lab']
active = set(range(num)); m = np.zeros((numm,2),dtype=int)
node = np.zeros(num,dtype=int); h = np.zeros(numm)
w=np.empty(num); w.fill(1)
for k in range(numm):
printNet(G,nam,active)
# determine the closest pair of clusters (p,q)
numA = len(active); dmin = infinity; bctive = deepcopy(active)
print(list(G.links()))
for e in G.links():
u,v,isArc,r,W = G._links[e]; we = W['w']
if we < dmin: dmin = we; p,q,em = u-1,v-1,e
if dmin == infinity: break
G.delLink(em)
# join the closest pair of clusters
h[k] = dmin; active.discard(q)
m[k][0] = -(p+1) if node[p]==0 else node[p]
m[k][1] = -(q+1) if node[q]==0 else node[q]
print(k+1," p,q:",p,q,"m:",m[k][0],m[k][1],"dmin=",dmin,">",active)
# update the network
u = p+1; v = q+1
for e in list(G.star(v)):
print("e:",e,">",G._links[e])
s = G.twin(v,e); we = G.getLink(e,'w',null=infinity)
G.delLink(e)
# determine dissimilarities to the new cluster
if s != u:
if not G.linked(u,s): G.addArc(u,s,w={'w':we+0})
else:
if G.arcTo(u,s): f = G._nodes[u][2][s][0]
else: f = G._nodes[u][1][s][0]
wf = G.getLink(f,'w',null=infinity)
if method=="max": D = max(we,wf)
elif method=="min": D = min(we,wf)
elif method=="ward":
ww = w[p]+w[q]+w[s-1]
D = ((w[q]+w[s-1])*we + (w[p]+w[s-1])*wf - w[s-1]*h[k])/ww
else: print('unknown method',method,'\n'); return(None)
G.setLink(f,'w',D+0)
node[p] = k+1; w[p] = w[p]+w[q];
return {'proc':"hclusRC", 'merge':m.tolist(), 'height':h.tolist(),
'order': ordNode(orDendro(len(m))), 'labels': nam,
'method':'tolerant', 'call':None, 'dist.method':method }
net = N.loadPajek("SomeTyW.net")
net.Info()
num = len(net._nodes); nam = [""]*num
for i in range(num): nam[i] = net._nodes[i+1][3]['lab']
rez = tolerant(net,method="min")
js = open("SomeTyTol.json",'w'); json.dump(rez, js, indent=1); js.close()
> wdir <- "C:/Users/batagelj/work/Python/graph/Nets/relCon"
> setwd(wdir)
> library(jsonlite)
> js <- "SomeTyTol.json"
> R <- fromJSON(js)
> attr(R,"class") <- "hclust"
> plot(R,hang=-1)
TO DO:
* additional strategies
* nonconnected solution
* speed-up heap dictionary
* isArc
*