NetsBM - generalized blockmodeling based on Nets

The modul netsBM implements generalized blockmodeling procedure for simple binary (the link weights are not considered) networks based on network analysis package Nets.

To speed-up the traditional procedure the local optimization is limited to the end-nodes of links belonging to different clusters.

Python code, Test data

This is an improvement of the first version of NetsBM:

  • it works on any simple network (not only on directed);
  • the generation of random initial partitions can be controlled using lists R and Q;
  • the minimal partitions are normalized (if normalize==True);
  • different minimal partitions are saved in a list.
wdir = "C:/Users/batagelj/work/projects/BM/py"
gdir = "c:/users/batagelj/work/python/graph/Nets"
ddir = "C:/Users/batagelj/work/projects/BM/data"
 
import sys, os, re, json
sys.path = [gdir]+sys.path; os.chdir(wdir)
from Nets import Network as N
import numpy as np
from copy import copy, deepcopy
from timeit import default_timer as timer
from datetime import datetime
 
def savePajekPar(L,file):
    clu = open(file,'w');  n=len(L)
    clu.write('*vertices '+str(n)+'\n')
    for i in range(n): clu.write(str(L[i])+'\n')
    clu.close()
 
def normPar(L):
    I = -np.ones(1+max(L),dtype=int)
    N = np.zeros(len(L),dtype=int); k = -1
    for i,j in enumerate(L):
        if I[j]<0: k += 1; I[j] = k
        N[i] = I[j]
    return N
 
def P(net,C,Types):
    n = net._info['nNodes']; nC = Types.shape[0]; size = np.zeros(nC,dtype=int)
    for i in C: size[i] += 1
    dCnt = np.zeros(nC); Cnt = np.zeros((nC,nC))
  # Sum = np.zeros((nC,nC)); rSum = np.zeros((nC,n)); cSum = np.zeros((nC,n))
    rCnt = np.zeros((nC,n)); cCnt = np.zeros((nC,n))
    rPos = np.zeros((nC,nC)); cPos = np.zeros((nC,nC))
    error = np.zeros((nC,nC)); btype = np.zeros((nC,nC),dtype=int)
    for a in net.links():
        u,v,d,r,W = net.link(a); w = W['w']; p = u-1; q = v-1
        Cnt[C[p],C[q]] += 1; rCnt[C[q],p] += 1; cCnt[C[p],q] += 1
        if not d: Cnt[C[q],C[p]] += 1; rCnt[C[p],q] += 1; cCnt[C[q],p] += 1
        if p==q: dCnt[C[p]] += 1
    rB = (rCnt>0).astype(int); cB = (cCnt>0).astype(int)        
    for i in range(nC):
        for s in range(n):
            rPos[C[s],i] += rB[i,s]; cPos[i,C[s]] += cB[i,s]
    for i in range(nC):
        for j in range(nC):
            err = np.inf; typ = XXX
            if NUL in Types[i,j]: 
                ert = Cnt[i,j]
                if i==j: ert += min(0,size[i]-2*dCnt[i])
                if ert < err: err = ert; typ = NUL
            if COM in Types[i,j]: 
                ert = size[i]*size[j] - Cnt[i,j]
                if i==j: ert += min(0,2*dCnt[i]-size[i])
                if ert < err: err = ert; typ = COM
            if REG in Types[i,j]: 
                ert = (size[j]-cPos[i,j])*size[i] +\
                      (size[i]-rPos[i,j])*cPos[i,j]
                if ert < err: err = ert; typ = REG
            if RRE in Types[i,j]: 
                ert = (size[i]-rPos[i,j])*size[j]
                if ert < err: err = ert; typ = RRE
            if CRE in Types[i,j]: 
                ert = (size[j]-cPos[i,j])*size[i] 
                if ert < err: err = ert; typ = DEN
            if DEN in Types[i,j]: 
                ert = max(int(dProp*size[i]*(size[i]-1))-Cnt[i,j]+dCnt[i],0) if i==j \
                      else max(int(dProp*size[i]*size[j])-Cnt[i,j],0)
            error[i,j] = err; btype[i,j] = typ
    return (np.sum(error), error, btype)
 
def optimize(net,C,Types,minSize):
    nC = Types.shape[0]; size = np.zeros(nC,dtype=int)
    for i in C: size[i] += 1
    if np.min(size)<minSize: return(C,np.inf,None,None)
    found = True; Clu = np.array(C,dtype=int); Cm = Clu.copy()
    Err, Q, T = P(net,Clu,Types)
    while found:
        found = False
        for a in net.links():
            u,v,d,r,W = net.link(a); w = W['w']
            p = u-1; q = v-1; Cp = Clu[p]; Cq = Clu[q]
            if not(Cp==Cq):
                Clu[p] = Cq; size[Cp] += -1; size[Cq] += 1
                if size[Cp]<minSize: err = np.inf
                else: err, Q, T = P(net,Clu,Types)
                if err<Err: Cm = Clu.copy(); Err = err; found = True
                else:
                    Clu[p] = Cp; Clu[q] = Cp; size[Cp] += 2; size[Cq] += -2
                    if size[Cq]<minSize: err = np.inf
                    else: err, Q, T = P(net,Clu,Types)
                    if err<Err: Cm = Clu.copy(); Err = err; found = True
                    else:
                        Clu[p] = Cq; size[Cp] += -1; size[Cq] += 1
                        err, Q, T = P(net,Clu,Types)
                        if err<Err: Cm = Clu.copy(); Err = err; found = True
                        else: Clu[p] = Cp; Clu[q] = Cq 
    return (Cm, Err)
 
def searchBM(Types,nSteps=10,Pbest=np.inf,R=[1,1,1],Q=None,minSize=1,
             normalize=True,trace=1):
    print('netsBM:', datetime.now())
    start = timer(); n = net._info['nNodes']; nC = Types.shape[0]
    iD = np.array([net.inDegree(u) for u in net.nodes()],dtype='I')        
    oD = np.array([net.outDegree(u) for u in net.nodes()],dtype='I')
    ind = np.logical_and(iD<2,oD<2); nSpec = np.sum(ind); spec = np.any(ind)
    if Q is None: Q = [1]*nC
    r = np.array(R)/sum(R); q = np.array(Q)/sum(Q)
    found = False; bad = 0
    for k in range(nSteps):
        j = np.random.choice([0,1,2],p=r)
        if   j==0: C = np.random.choice(range(nC),size=(n))
        elif j==1: C = np.random.choice(range(nC),p=q,size=(n))
        elif j==2:
            C = spec+np.random.choice(range(nC-spec),size=(n))
            if spec: C[ind] = 0
        R = optimize(net,C,Types,minSize); Er = R[1]
        if trace>1: print(k+1,j,Er)
        if Er == np.inf: bad += 1
        elif Er <= Pbest:
            if Er < Pbest: Pbest = Er; Cbest = []; nBest = 0
            found = True; nBest += 1; newC = True
            CC = normPar(R[0]) if normalize else R[0]
            for CB in Cbest:
                if np.all(CC==CB): newC = False; break
            if newC: Cbest.append(CC)
            if trace>0:
                if n > 50: print(k+1,j,Er)
                else: print(k+1,j,Er,"\n  C :",C,"\n  C*:",CC)
    end = timer(); print("time:",round(end-start,2),"s")
    if bad>0: print(bad,"bad initial partitions")
    if found: return (Pbest,nBest,Cbest)
 
# net = N.loadPajek(ddir+"/kite.net"); net.Info()
net = N.loadPajek(ddir+"/class.net"); net.Info()
# net = N.loadPajek(ddir+"/USAir97.net"); net.Info()
 
XXX = 0; NUL = 1; COM = 2; REG = 3; RRE = 4; CRE = 5; DEN = 6
# n = net._info['nNodes']; nC = 6
n = net._info['nNodes']; nC = 3; dProp=0.5
 
Types = np.array([{NUL,COM}]*(nC*nC),dtype='O').reshape(nC,nC)
Q = [3,3,9] 
# Rez = searchBM(Types,100,Q=Q)
# Pbest = Rez[0]; nBest = Rez[1]
# Rez = searchBM(Types,100,Pbest,Q=Q)
# Types = np.array([{NUL,COM,REG}]*(nC*nC),dtype='O').reshape(nC,nC)
# Reg = searchBM(Types,100,Q=Q,minSize=2)
# Prbest = Reg[1]; Regbest = deepcopy(Reg)
# Reg = searchBM(500,Prbest)

Class

>>> 
========== RESTART: C:\Users\batagelj\work\projects\BM\py\netsBM.py ==========
NODES:  n = 15  mode = 1
ARCS:  multirel = False
EDGES:  multirel = False
network:  test 
 Test 
simple= False  directed= False  org= 1  mode= 1  multirel= False  temporal= False 
nodes= 15  links= 43  arcs= 30  edges= 13
>>> Rez = searchBM(Types,30,Q=Q)
netsBM: 2020-08-16 00:04:08.200403
1 1 28.0 
  C : [2 2 2 2 2 2 2 2 2 2 0 1 2 1 2] 
  C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0]
13 1 28.0 
  C : [2 0 1 1 2 2 2 2 2 0 2 2 2 2 2] 
  C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0]
15 2 28.0 
  C : [2 0 2 2 1 1 0 1 2 2 2 1 2 2 2] 
  C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0]
22 1 28.0 
  C : [0 2 0 2 2 2 0 2 2 0 2 1 0 2 0] 
  C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0]
25 0 28.0 
  C : [2 1 0 1 0 2 1 2 2 2 2 0 2 2 0] 
  C*: [0 0 1 2 0 2 1 0 1 1 0 2 0 0 0]
time: 5.1 s
2 bad initial partitions
>>> Rez
(28.0, 5, [array([0, 0, 1, 2, 0, 2, 1, 0, 1, 1, 0, 2, 0, 0, 0])])
>>> Types = np.array([{NUL,COM,REG}]*(nC*nC),dtype='O').reshape(nC,nC)
 
>>> Reg = searchBM(Types,30,Q=Q,minSize=2)
netsBM: 2020-08-16 00:10:59.370921
1 0 15.0 
  C : [1 2 0 1 1 1 0 1 2 2 0 2 1 1 0] 
  C*: [0 1 2 2 2 2 2 2 2 2 0 2 2 1 2]
2 2 15.0 
  C : [2 2 2 1 0 2 0 2 0 0 1 2 1 2 2] 
  C*: [0 1 1 1 1 1 1 1 1 1 2 2 1 0 1]
3 0 14.0 
  C : [2 0 0 1 1 2 2 1 2 1 0 2 1 0 2] 
  C*: [0 0 1 1 0 1 1 1 1 1 2 0 0 2 0]
22 2 11.0 
  C : [1 1 0 1 1 0 2 2 2 1 2 0 0 1 2] 
  C*: [0 1 2 2 1 2 2 2 2 2 0 1 1 0 1]
time: 4.4 s
3 bad initial partitions
>>> Reg
(11.0, 1, [array([0, 1, 2, 2, 1, 2, 2, 2, 2, 2, 0, 1, 1, 0, 1])])
>>> dProp=0.5
>>> denEq = np.array([{NUL,DEN}]*(nC*nC),dtype='O').reshape(nC,nC)
>>> den = searchBM(denEq,30,Q=Q,R=[1,1,0],minSize=2)
netsBM: 2020-08-16 22:09:27.742218
1 0 11.0 
  C : [0 2 1 1 0 1 0 2 0 0 1 2 2 2 0] 
  C*: [0 0 1 2 1 2 1 1 1 1 2 2 1 0 1]
5 0 8.0 
  C : [2 2 1 2 1 1 1 0 1 1 1 0 0 2 2] 
  C*: [0 0 1 2 1 2 1 1 1 1 0 2 1 0 0]
8 0 8.0 
  C : [0 1 2 1 2 1 0 2 1 1 1 2 1 0 0] 
  C*: [0 0 1 2 1 2 1 1 1 1 0 2 1 0 0]
19 1 8.0 
  C : [2 1 0 1 1 0 2 1 0 2 2 1 0 2 2] 
  C*: [0 0 1 2 1 2 1 1 1 1 0 2 1 0 0]
25 0 8.0 
  C : [0 1 1 1 0 1 0 1 0 1 0 1 2 2 2] 
  C*: [0 0 1 2 1 2 1 1 1 1 0 2 1 0 0]
time: 3.57 s
5 bad initial partitions
>>> savePajekPar(den[2][0],"ClassDen3.clu")
>>> P(net,den[2][0],denEq)
(8.0, 
array([[1., 0., 0.],
       [0., 0., 1.],
       [2., 4., 0.]]), 
array([[1, 6, 1],
       [1, 6, 1],
       [1, 1, 6]]))

Density dProp = 0.5

Dolphins

>>> net = N.loadPajek(ddir+"/dolphins.net"); net.Info()
NODES:  n = 62  mode = 1
EDGES:  multirel = False
network:  test 
 Test 
simple= False  directed= False  org= 1  mode= 1  multirel= False  temporal= False 
nodes= 62  links= 159  arcs= 0  edges= 159
>>> n = net._info['nNodes']; nC = 8
>>> Q = [1,1,1,1,2,2,2,20]
>>> Types = np.array([{NUL,COM}]*(nC*nC),dtype='O').reshape(nC,nC)
>>> Rez = searchBM(Types,100,Q=Q)
netsBM: 2020-08-16 00:38:31.738431
1 0 318.0
2 2 318.0
3 2 318.0
4 2 318.0
5 2 318.0
6 2 292.0
19 1 272.0
35 0 270.0
49 1 242.0
time: 252.46 s
13 bad initial partitions
>>> Q = [1,1,1,1,1,1,1,10]
>>> Res = searchBM(Types,100,R=[0,1,0],Q=Q)
netsBM: 2020-08-16 01:08:50.263445
1 1 318.0
3 1 316.0
5 1 298.0
8 1 290.0
9 1 266.0
29 1 254.0
66 1 236.0
96 1 228.0
time: 311.44 s
16 bad initial partitions
>>> Res
(228.0, 1,
[array([0, 1, 0, 2, 1, 3, 3, 4, 5, 3, 0, 5, 2, 3, 6, 2, 6, 3, 7, 4, 2, 7,
        2, 2, 7, 2, 1, 2, 2, 7, 4, 1, 5, 6, 6, 2, 1, 6, 6, 2, 2, 2, 0, 6,
        2, 7, 2, 0, 2, 2, 2, 7, 2, 2, 4, 2, 2, 3, 2, 2, 2, 2])])
>>> savePajekPar(Res[2][0],"dolphinsStr8.clu")

dolphmstr8.pdf

US Airports

>>> net = N.loadPajek(ddir+"/USAir97.net"); net.Info()
NODES:  n = 332  mode = 1
ARCS:  multirel = False
EDGES:  multirel = False
network:  test 
 Test 
simple= False  directed= False  org= 1  mode= 1  multirel= False  temporal= False 
nodes= 332  links= 2126  arcs= 0  edges= 2126
>>> nC=8; n = net._info['nNodes']
>>> Types = np.array([{NUL,COM}]*(nC*nC),dtype='O').reshape(nC,nC)
>>> Q = [1,1,1,1,1,1,1,70]
>>> Res = searchBM(Types,20,R=[0,1,1],Q=Q)
netsBM: 2020-08-16 02:08:26.234979
1 2 4252.0
2 1 3238.0
3 1 3162.0
4 1 3154.0
7 1 2994.0
time: 6121.67 s
1 bad initial partitions
>>> BM = P(net,Res[2][0],Types)
>>> BM
(2994.0,
array([[648.,   9.,  16.,   3., 304., 478.,  11.,   1.],
       [  9.,   0.,   0.,   0.,   2.,   4.,   0.,   0.],
       [ 16.,   0.,   0.,   0.,   5.,  12.,   0.,   0.],
       [  3.,   0.,   0.,   0.,   1.,   1.,   0.,   0.],
       [304.,   2.,   5.,   1., 240., 173.,   4.,   4.],
       [478.,   4.,  12.,   1., 173.,  14.,   9.,   9.],
       [ 11.,   0.,   0.,   0.,   4.,   9.,   0.,   0.],
       [  1.,   0.,   0.,   0.,   4.,   9.,   0.,   0.]]), 
array([[1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 2, 1, 1],
       [1, 1, 1, 1, 2, 2, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1]]))
>>> savePajekPar(Res[2][0],"USairStr8.clu")
>>> net = N.loadPajek(ddir+"/USAir97.net"); net.Info()
>>> n = net._info['nNodes']; nC = 8; dProp=0.5
>>> Q = [1,1,1,1,1,1,1,30]
>>> denEq = np.array([{NUL,DEN}]*(nC*nC),dtype='O').reshape(nC,nC)
>>> USden = searchBM(denEq,10,Q=Q,R=[0,1,1],minSize=3)
netsBM: 2020-08-16 23:36:55.260359
1 1 4252.0
2 1 2083.0
4 1 2067.0
7 1 2055.0
9 1 2051.0
time: 2775.3 s
>>> savePajekPar(USden[2][0],"USairDen8.clu")

Structural, 8 clusters: usairstr8.pdf

Density pProp=0.5, 8 clusters: usairden8.pdf

To do

  1. add penalty weights, constraints and other block types;
  2. compute also permutation determined by the ordering (cluster, component, degree; node);
  3. reprogram the function P using switch/case (1, 2, 3);
  4. would it be better to make tables from the function P global?
  5. replace the tests if err<Err: in function optimize with if 0 < Delta: where the change of criterion function value Delta = Err - err = P(net,Cm,Types) - P(net,Clu,Types) is “formally evaluated” (a very tricky task - many terms are canceled);
  6. program it in Julia. Note that the network can be represented by list of links (arcs/edges);
  7. extend the algorithm to two-mode networks.
vlado/work/gbm.txt · Last modified: 2020/08/18 04:33 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki