The modul netsBM
implements generalized blockmodeling procedure for simple binary (the link weights are not considered) networks based on network analysis package Nets
.
To speed-up the traditional procedure the local optimization is limited to the end-nodes of links belonging to different clusters.
This is an improvement of the first version of NetsBM
:
wdir = "C:/Users/batagelj/work/projects/BM/py" gdir = "c:/users/batagelj/work/python/graph/Nets" ddir = "C:/Users/batagelj/work/projects/BM/data" import sys, os, re, json sys.path = [gdir]+sys.path; os.chdir(wdir) from Nets import Network as N import numpy as np from copy import copy, deepcopy from timeit import default_timer as timer from datetime import datetime def savePajekPar(L,file): clu = open(file,'w'); n=len(L) clu.write('*vertices '+str(n)+'\n') for i in range(n): clu.write(str(L[i])+'\n') clu.close() def normPar(L): I = -np.ones(1+max(L),dtype=int) N = np.zeros(len(L),dtype=int); k = -1 for i,j in enumerate(L): if I[j]<0: k += 1; I[j] = k N[i] = I[j] return N def P(net,C,Types): n = net._info['nNodes']; nC = Types.shape[0]; size = np.zeros(nC,dtype=int) for i in C: size[i] += 1 dCnt = np.zeros(nC); Cnt = np.zeros((nC,nC)) # Sum = np.zeros((nC,nC)); rSum = np.zeros((nC,n)); cSum = np.zeros((nC,n)) rCnt = np.zeros((nC,n)); cCnt = np.zeros((nC,n)) rPos = np.zeros((nC,nC)); cPos = np.zeros((nC,nC)) error = np.zeros((nC,nC)); btype = np.zeros((nC,nC),dtype=int) for a in net.links(): u,v,d,r,W = net.link(a); w = W['w']; p = u-1; q = v-1 Cnt[C[p],C[q]] += 1; rCnt[C[q],p] += 1; cCnt[C[p],q] += 1 if not d: Cnt[C[q],C[p]] += 1; rCnt[C[p],q] += 1; cCnt[C[q],p] += 1 if p==q: dCnt[C[p]] += 1 rB = (rCnt>0).astype(int); cB = (cCnt>0).astype(int) for i in range(nC): for s in range(n): rPos[C[s],i] += rB[i,s]; cPos[i,C[s]] += cB[i,s] for i in range(nC): for j in range(nC): err = np.inf; typ = XXX if NUL in Types[i,j]: ert = Cnt[i,j] if i==j: ert += min(0,size[i]-2*dCnt[i]) if ert < err: err = ert; typ = NUL if COM in Types[i,j]: ert = size[i]*size[j] - Cnt[i,j] if i==j: ert += min(0,2*dCnt[i]-size[i]) if ert < err: err = ert; typ = COM if REG in Types[i,j]: ert = (size[j]-cPos[i,j])*size[i] +\ (size[i]-rPos[i,j])*cPos[i,j] if ert < err: err = ert; typ = REG if RRE in Types[i,j]: ert = (size[i]-rPos[i,j])*size[j] if ert < err: err = ert; typ = RRE if CRE in Types[i,j]: ert = (size[j]-cPos[i,j])*size[i] if ert < err: err = ert; typ = DEN if DEN in Types[i,j]: ert = max(int(dProp*size[i]*(size[i]-1))-Cnt[i,j]+dCnt[i],0) if i==j \ else max(int(dProp*size[i]*size[j])-Cnt[i,j],0) error[i,j] = err; btype[i,j] = typ return (np.sum(error), error, btype) def optimize(net,C,Types,minSize): nC = Types.shape[0]; size = np.zeros(nC,dtype=int) for i in C: size[i] += 1 if np.min(size)<minSize: return(C,np.inf,None,None) found = True; Clu = np.array(C,dtype=int); Cm = Clu.copy() Err, Q, T = P(net,Clu,Types) while found: found = False for a in net.links(): u,v,d,r,W = net.link(a); w = W['w'] p = u-1; q = v-1; Cp = Clu[p]; Cq = Clu[q] if not(Cp==Cq): Clu[p] = Cq; size[Cp] += -1; size[Cq] += 1 if size[Cp]<minSize: err = np.inf else: err, Q, T = P(net,Clu,Types) if err<Err: Cm = Clu.copy(); Err = err; found = True else: Clu[p] = Cp; Clu[q] = Cp; size[Cp] += 2; size[Cq] += -2 if size[Cq]<minSize: err = np.inf else: err, Q, T = P(net,Clu,Types) if err<Err: Cm = Clu.copy(); Err = err; found = True else: Clu[p] = Cq; size[Cp] += -1; size[Cq] += 1 err, Q, T = P(net,Clu,Types) if err<Err: Cm = Clu.copy(); Err = err; found = True else: Clu[p] = Cp; Clu[q] = Cq return (Cm, Err) def searchBM(Types,nSteps=10,Pbest=np.inf,R=[1,1,1],Q=None,minSize=1, normalize=True,trace=1): print('netsBM:', datetime.now()) start = timer(); n = net._info['nNodes']; nC = Types.shape[0] iD = np.array([net.inDegree(u) for u in net.nodes()],dtype='I') oD = np.array([net.outDegree(u) for u in net.nodes()],dtype='I') ind = np.logical_and(iD<2,oD<2); nSpec = np.sum(ind); spec = np.any(ind) if Q is None: Q = [1]*nC r = np.array(R)/sum(R); q = np.array(Q)/sum(Q) found = False; bad = 0 for k in range(nSteps): j = np.random.choice([0,1,2],p=r) if j==0: C = np.random.choice(range(nC),size=(n)) elif j==1: C = np.random.choice(range(nC),p=q,size=(n)) elif j==2: C = spec+np.random.choice(range(nC-spec),size=(n)) if spec: C[ind] = 0 R = optimize(net,C,Types,minSize); Er = R[1] if trace>1: print(k+1,j,Er) if Er == np.inf: bad += 1 elif Er <= Pbest: if Er < Pbest: Pbest = Er; Cbest = []; nBest = 0 found = True; nBest += 1; newC = True CC = normPar(R[0]) if normalize else R[0] for CB in Cbest: if np.all(CC==CB): newC = False; break if newC: Cbest.append(CC) if trace>0: if n > 50: print(k+1,j,Er) else: print(k+1,j,Er,"\n C :",C,"\n C*:",CC) end = timer(); print("time:",round(end-start,2),"s") if bad>0: print(bad,"bad initial partitions") if found: return (Pbest,nBest,Cbest) # net = N.loadPajek(ddir+"/kite.net"); net.Info() net = N.loadPajek(ddir+"/class.net"); net.Info() # net = N.loadPajek(ddir+"/USAir97.net"); net.Info() XXX = 0; NUL = 1; COM = 2; REG = 3; RRE = 4; CRE = 5; DEN = 6 # n = net._info['nNodes']; nC = 6 n = net._info['nNodes']; nC = 3; dProp=0.5 Types = np.array([{NUL,COM}]*(nC*nC),dtype='O').reshape(nC,nC) Q = [3,3,9] # Rez = searchBM(Types,100,Q=Q) # Pbest = Rez[0]; nBest = Rez[1] # Rez = searchBM(Types,100,Pbest,Q=Q) # Types = np.array([{NUL,COM,REG}]*(nC*nC),dtype='O').reshape(nC,nC) # Reg = searchBM(Types,100,Q=Q,minSize=2) # Prbest = Reg[1]; Regbest = deepcopy(Reg) # Reg = searchBM(500,Prbest)
>>> ========== RESTART: C:\Users\batagelj\work\projects\BM\py\netsBM.py ========== NODES: n = 15 mode = 1 ARCS: multirel = False EDGES: multirel = False network: test Test simple= False directed= False org= 1 mode= 1 multirel= False temporal= False nodes= 15 links= 43 arcs= 30 edges= 13 >>> Rez = searchBM(Types,30,Q=Q) netsBM: 2020-08-16 00:04:08.200403 1 1 28.0 C : [2 2 2 2 2 2 2 2 2 2 0 1 2 1 2] C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0] 13 1 28.0 C : [2 0 1 1 2 2 2 2 2 0 2 2 2 2 2] C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0] 15 2 28.0 C : [2 0 2 2 1 1 0 1 2 2 2 1 2 2 2] C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0] 22 1 28.0 C : [0 2 0 2 2 2 0 2 2 0 2 1 0 2 0] C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0] 25 0 28.0 C : [2 1 0 1 0 2 1 2 2 2 2 0 2 2 0] C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0] time: 5.1 s 2 bad initial partitions >>> Rez (28.0, 5, [array([0, 0, 1, 2, 0, 2, 1, 0, 1, 1, 0, 2, 0, 0, 0])]) >>> Types = np.array([{NUL,COM,REG}]*(nC*nC),dtype='O').reshape(nC,nC) >>> Reg = searchBM(Types,30,Q=Q,minSize=2) netsBM: 2020-08-16 00:10:59.370921 1 0 15.0 C : [1 2 0 1 1 1 0 1 2 2 0 2 1 1 0] C*: [0 1 2 2 2 2 2 2 2 2 0 2 2 1 2] 2 2 15.0 C : [2 2 2 1 0 2 0 2 0 0 1 2 1 2 2] C*: [0 1 1 1 1 1 1 1 1 1 2 2 1 0 1] 3 0 14.0 C : [2 0 0 1 1 2 2 1 2 1 0 2 1 0 2] C*: [0 0 1 1 0 1 1 1 1 1 2 0 0 2 0] 22 2 11.0 C : [1 1 0 1 1 0 2 2 2 1 2 0 0 1 2] C*: [0 1 2 2 1 2 2 2 2 2 0 1 1 0 1] time: 4.4 s 3 bad initial partitions >>> Reg (11.0, 1, [array([0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 0, 1, 1, 0, 1])]) >>> dProp=0.5 >>> denEq = np.array([{NUL,DEN}]*(nC*nC),dtype='O').reshape(nC,nC) >>> den = searchBM(denEq,30,Q=Q,R=[1,1,0],minSize=2) netsBM: 2020-08-16 22:09:27.742218 1 0 11.0 C : [0 2 1 1 0 1 0 2 0 0 1 2 2 2 0] C*: [0 0 1 2 1 2 1 1 1 1 2 2 1 0 1] 5 0 8.0 C : [2 2 1 2 1 1 1 0 1 1 1 0 0 2 2] C*: [0 0 1 2 1 2 1 1 1 1 0 2 1 0 0] 8 0 8.0 C : [0 1 2 1 2 1 0 2 1 1 1 2 1 0 0] C*: [0 0 1 2 1 2 1 1 1 1 0 2 1 0 0] 19 1 8.0 C : [2 1 0 1 1 0 2 1 0 2 2 1 0 2 2] C*: [0 0 1 2 1 2 1 1 1 1 0 2 1 0 0] 25 0 8.0 C : [0 1 1 1 0 1 0 1 0 1 0 1 2 2 2] C*: [0 0 1 2 1 2 1 1 1 1 0 2 1 0 0] time: 3.57 s 5 bad initial partitions >>> savePajekPar(den[2][0],"ClassDen3.clu") >>> P(net,den[2][0],denEq) (8.0, array([[1., 0., 0.], [0., 0., 1.], [2., 4., 0.]]), array([[1, 6, 1], [1, 6, 1], [1, 1, 6]]))
Density dProp = 0.5
>>> net = N.loadPajek(ddir+"/dolphins.net"); net.Info() NODES: n = 62 mode = 1 EDGES: multirel = False network: test Test simple= False directed= False org= 1 mode= 1 multirel= False temporal= False nodes= 62 links= 159 arcs= 0 edges= 159 >>> n = net._info['nNodes']; nC = 8 >>> Q = [1,1,1,1,2,2,2,20] >>> Types = np.array([{NUL,COM}]*(nC*nC),dtype='O').reshape(nC,nC) >>> Rez = searchBM(Types,100,Q=Q) netsBM: 2020-08-16 00:38:31.738431 1 0 318.0 2 2 318.0 3 2 318.0 4 2 318.0 5 2 318.0 6 2 292.0 19 1 272.0 35 0 270.0 49 1 242.0 time: 252.46 s 13 bad initial partitions >>> Q = [1,1,1,1,1,1,1,10] >>> Res = searchBM(Types,100,R=[0,1,0],Q=Q) netsBM: 2020-08-16 01:08:50.263445 1 1 318.0 3 1 316.0 5 1 298.0 8 1 290.0 9 1 266.0 29 1 254.0 66 1 236.0 96 1 228.0 time: 311.44 s 16 bad initial partitions >>> Res (228.0, 1, [array([0, 1, 0, 2, 1, 3, 3, 4, 5, 3, 0, 5, 2, 3, 6, 2, 6, 3, 7, 4, 2, 7, 2, 2, 7, 2, 1, 2, 2, 7, 4, 1, 5, 6, 6, 2, 1, 6, 6, 2, 2, 2, 0, 6, 2, 7, 2, 0, 2, 2, 2, 7, 2, 2, 4, 2, 2, 3, 2, 2, 2, 2])]) >>> savePajekPar(Res[2][0],"dolphinsStr8.clu")
>>> net = N.loadPajek(ddir+"/USAir97.net"); net.Info() NODES: n = 332 mode = 1 ARCS: multirel = False EDGES: multirel = False network: test Test simple= False directed= False org= 1 mode= 1 multirel= False temporal= False nodes= 332 links= 2126 arcs= 0 edges= 2126 >>> nC=8; n = net._info['nNodes'] >>> Types = np.array([{NUL,COM}]*(nC*nC),dtype='O').reshape(nC,nC) >>> Q = [1,1,1,1,1,1,1,70] >>> Res = searchBM(Types,20,R=[0,1,1],Q=Q) netsBM: 2020-08-16 02:08:26.234979 1 2 4252.0 2 1 3238.0 3 1 3162.0 4 1 3154.0 7 1 2994.0 time: 6121.67 s 1 bad initial partitions >>> BM = P(net,Res[2][0],Types) >>> BM (2994.0, array([[648., 9., 16., 3., 304., 478., 11., 1.], [ 9., 0., 0., 0., 2., 4., 0., 0.], [ 16., 0., 0., 0., 5., 12., 0., 0.], [ 3., 0., 0., 0., 1., 1., 0., 0.], [304., 2., 5., 1., 240., 173., 4., 4.], [478., 4., 12., 1., 173., 14., 9., 9.], [ 11., 0., 0., 0., 4., 9., 0., 0.], [ 1., 0., 0., 0., 4., 9., 0., 0.]]), array([[1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 2, 1, 1], [1, 1, 1, 1, 2, 2, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]])) >>> savePajekPar(Res[2][0],"USairStr8.clu") >>> net = N.loadPajek(ddir+"/USAir97.net"); net.Info() >>> n = net._info['nNodes']; nC = 8; dProp=0.5 >>> Q = [1,1,1,1,1,1,1,30] >>> denEq = np.array([{NUL,DEN}]*(nC*nC),dtype='O').reshape(nC,nC) >>> USden = searchBM(denEq,10,Q=Q,R=[0,1,1],minSize=3) netsBM: 2020-08-16 23:36:55.260359 1 1 4252.0 2 1 2083.0 4 1 2067.0 7 1 2055.0 9 1 2051.0 time: 2775.3 s >>> savePajekPar(USden[2][0],"USairDen8.clu")
Structural, 8 clusters: usairstr8.pdf
Density pProp=0.5
, 8 clusters: usairden8.pdf
if err<Err:
in function optimize with if 0 < Delta:
where the change of criterion function value Delta = Err - err = P(net,Cm,Types) - P(net,Clu,Types)
is “formally evaluated” (a very tricky task - many terms are canceled);