====== Clustering with relational constraint / Hierarchical clustering / Tolerant ====== [[vlado:work:clu|Clustering]] [[pro:relc:tolex|Tolerant / Example]]\\ https://raw.githubusercontent.com/bavla/cluRC/master/SomeTyW.net wdir = "C:/Users/batagelj/work/Python/graph/Nets/relCon" gdir = "c:/users/batagelj/work/python/graph/Nets" import sys, os, re, datetime, json sys.path = [gdir]+sys.path; os.chdir(wdir) from TQ import * from Nets import Network as N import numpy as np from copy import copy, deepcopy def printNet(net,nam,active): for e in net.links(): u = net.initNode(e); v = net.termNode(e) w = net.getLink(e,'w',null=float('inf')) print(e,":",u,v,nam[u-1],nam[v-1],w) def tolerant(net,method="max"): global m infinity = float('inf') def ordNode(S): return [ int(a) for a in S ] def orDendro(i): global m if i<0: return [-i] else: return orDendro(m[i-1,0])+orDendro(m[i-1,1]) G = deepcopy(net); G.delLoops() num = len(net._nodes); numm = num-1; nam = [""]*num for i in range(num): nam[i] = net._nodes[i+1][3]['lab'] active = set(range(num)); m = np.zeros((numm,2),dtype=int) node = np.zeros(num,dtype=int); h = np.zeros(numm) w=np.empty(num); w.fill(1) for k in range(numm): printNet(G,nam,active) # determine the closest pair of clusters (p,q) numA = len(active); dmin = infinity; bctive = deepcopy(active) print(list(G.links())) for e in G.links(): u,v,isArc,r,W = G._links[e]; we = W['w'] if we < dmin: dmin = we; p,q,em = u-1,v-1,e if dmin == infinity: break G.delLink(em) # join the closest pair of clusters h[k] = dmin; active.discard(q) m[k][0] = -(p+1) if node[p]==0 else node[p] m[k][1] = -(q+1) if node[q]==0 else node[q] print(k+1," p,q:",p,q,"m:",m[k][0],m[k][1],"dmin=",dmin,">",active) # update the network u = p+1; v = q+1 for e in list(G.star(v)): print("e:",e,">",G._links[e]) s = G.twin(v,e); we = G.getLink(e,'w',null=infinity) G.delLink(e) # determine dissimilarities to the new cluster if s != u: if not G.linked(u,s): G.addArc(u,s,w={'w':we+0}) else: if G.arcTo(u,s): f = G._nodes[u][2][s][0] else: f = G._nodes[u][1][s][0] wf = G.getLink(f,'w',null=infinity) if method=="max": D = max(we,wf) elif method=="min": D = min(we,wf) elif method=="ward": ww = w[p]+w[q]+w[s-1] D = ((w[q]+w[s-1])*we + (w[p]+w[s-1])*wf - w[s-1]*h[k])/ww else: print('unknown method',method,'\n'); return(None) G.setLink(f,'w',D+0) node[p] = k+1; w[p] = w[p]+w[q]; return {'proc':"hclusRC", 'merge':m.tolist(), 'height':h.tolist(), 'order': ordNode(orDendro(len(m))), 'labels': nam, 'method':'tolerant', 'call':None, 'dist.method':method } net = N.loadPajek("SomeTyW.net") net.Info() num = len(net._nodes); nam = [""]*num for i in range(num): nam[i] = net._nodes[i+1][3]['lab'] rez = tolerant(net,method="min") js = open("SomeTyTol.json",'w'); json.dump(rez, js, indent=1); js.close() > wdir <- "C:/Users/batagelj/work/Python/graph/Nets/relCon" > setwd(wdir) > library(jsonlite) > js <- "SomeTyTol.json" > R <- fromJSON(js) > attr(R,"class") <- "hclust" > plot(R,hang=-1) TO DO: * additional strategies * nonconnected solution * speed-up heap dictionary * isArc *