====== Leaders method for TQs ======
The leaders method for TQs is an adaptation of the leaders method for [[https://arxiv.org/abs/1507.06683|modal symbolic data]] for [[https://link.springer.com/article/10.1007/s13278-016-0330-4|temporal quantities]].
Slides from [[https://github.com/bavla/TQ/blob/master/docs/clusTQsunbelt.pdf|Sunbelt 2020]].
===== Clustering large set using the leaders method =====
To cluster all 13332 words (nodes) in {{https://github.com/bavla/TQ/blob/master/json/Terror.zip|Terror news}} I used an adapted leaders method searching for 100 clusters:
wdir = "C:/Users/batagelj/work/Python/graph/Nets/clusTQ"
gdir = 'c:/users/batagelj/work/python/graph/Nets'
import sys, os, re, datetime, json
sys.path = [gdir]+sys.path; os.chdir(wdir)
from TQ import *
from Nets import Network as N
from numpy import random
import numpy as np
from copy import copy, deepcopy
import collections
def table(arr): return collections.Counter(arr)
def timer(): return datetime.datetime.now().ctime()
def unitTQ(nVar): return [ 'unit', [[[], 0]] * nVar ]
# unit = ['unit',[[[],0],[[],0]]]
infinity = float('inf')
# computes weighted squared Euclidean dissimilarity between TQs
def distTQ(X,T,nVar,alpha):
global infinity
d = 0
for i in range(nVar):
nX = X[i][1]; nT = T[i][1]
if nT<=0: return infinity
s = TQ.sum(TQ.prodConst(X[i][0],1/max(1,nX)),
TQ.prodConst(T[i][0],-1/max(1,nT)))
d = alpha[i]*nX*TQ.total(TQ.prod(s,s))
# if is.na(d): return(infinity)
return d
# adapted leaders method
#---------------------------------------------
# VB, 16. julij 2010
# dodaj omejitev na najmanjše število enot v skupini omejeni polmer
# VB, 1. February 2013: added: time, initial clustering
# Python version: VB, 26. June 2020
#
def leaderTQ(TQs,maxL,nVar,alpha,clust=None,trace=2,tim=1,empty=0):
global unit
numTQ = len(TQs)
L = [ deepcopy(unit) for i in range(maxL)]
Ro = np.zeros(maxL); K = np.zeros(maxL,dtype=int)
# if not given, random partition into maxL clusters
if clust is None:
clust = np.random.choice(np.array(range(1,maxL-empty+1)),size=numTQ)
step = 0; print("LeaderTQ:",timer(),"\n\n")
while tim > 0:
step = step+1; K.fill(0)
# new leaders - determine the leaders of clusters in current partition
for j in range(maxL): L[j][0] = "L"+str(j+1)
for i in range(numTQ):
j = clust[i]-1
for k in range(nVar):
L[j][1][k][0] = TQ.sum(L[j][1][k][0],TQs[i][1][k][0])
L[j][1][k][1] = L[j][1][k][1]+TQs[i][1][k][1]
# new partition - assign each unit to the nearest new leader
clust.fill(0)
R = np.zeros(maxL); p = np.zeros(maxL); d = np.zeros(maxL)
for i in range(numTQ):
for k in range(maxL): d[k] = distTQ(TQs[i][1],L[k][1],nVar,alpha)
j = np.argmin(d); r = d[j]
if r == infinity:
print("Infinite unit=",i,"\n"); print(TQs[i])
clust[i] = j+1; p[j] = p[j] + r
if R[j]0: print("Step",step,timer())
delta = [ R[i]-Ro[i] for i in range(maxL) ]
if trace>1:
print(table(clust)); print(R); print(delta); print(p)
if trace>0: print("P =",sum(p))
if sum([abs(a) for a in delta])<0.0000001: break
Ro = R; tim = tim-1
if tim<1:
print(table(clust)); print(R); print(delta); print(p); print("P =",sum(p))
tim = int(input("Times repeat = :\n")); print(f'You entered {tim}')
# TO DO: in the case of empty clusters use the most distant TQs as seeds
return { 'proc':"leaderTQ", 'clust':clust.tolist(), 'leaders':L,
'R':R.tolist(), 'p':p.tolist() }
G = N.loadNetsJSON("C:/Users/batagelj/work/Python/graph/JSON/terror/terror.json")
G.Info()
nVar = 1; alpha = [1]; unit = unitTQ(nVar)
Ter = [[G._nodes[u][3]['lab'], [[G.TQnetSum(u), TQ.total(G.TQnetSum(u))]]]\
for u in G.nodes() ]
Rez = leaderTQ(Ter,100,nVar,alpha,trace=1,tim=5)
js = open("Terror100.json",'w'); json.dump(Rez, js, indent=1); js.close()
Tot = [ (t[0],t[1][0][1]) for t in Ter ]
js = open("Totals.json",'w'); json.dump(Tot, js, indent=1); js.close()
An iteration takes around 7 minutes. I stoped after 50 iterations.
>>>
= RESTART: C:/Users/batagelj/work/Python/graph/Nets/clusTQ/startLeadersTQ.py =
network: Terror
Terror news
simple= False directed= False org= 1 mode= 1 multirel= False temporal= True
nodes= 13332 links= 243447 arcs= 0 edges= 243447
Tmin= 1 Tmax= 67
>>> Rez = leaderTQ(Ter,100,nVar,alpha,trace=1,tim=5)
LeaderTQ: Fri Jun 26 16:17:11 2020
Step 1 Fri Jun 26 16:24:03 2020 P = 41012.94437438724
Step 2 Fri Jun 26 16:31:00 2020 P = 34816.987910189026
Step 3 Fri Jun 26 16:38:02 2020 P = 32237.7188229329
Step 4 Fri Jun 26 16:45:00 2020 P = 30828.419469529275
Step 5 Fri Jun 26 16:51:42 2020 P = 29892.127425480423
Step 6 Fri Jun 26 16:59:51 2020 P = 29194.59947427378
Step 7 Fri Jun 26 17:07:38 2020 P = 28641.034731738484
Step 8 Fri Jun 26 17:15:05 2020 P = 28192.488239481285
Step 9 Fri Jun 26 17:22:39 2020 P = 27821.84173801013
Step 10 Fri Jun 26 17:30:10 2020 P = 27504.183547832534
Step 11 Fri Jun 26 17:42:01 2020 P = 27228.52518355242
Step 12 Fri Jun 26 17:49:49 2020 P = 26981.33179788163
Step 13 Fri Jun 26 17:57:05 2020 P = 26762.369836208803
Step 14 Fri Jun 26 18:04:19 2020 P = 26568.593619232168
Step 15 Fri Jun 26 18:11:31 2020 P = 26395.62563760305
Step 16 Fri Jun 26 18:28:22 2020 P = 26236.897995694795
Step 17 Fri Jun 26 18:34:46 2020 P = 26088.981052957482
Step 18 Fri Jun 26 18:41:47 2020 P = 25951.57575586071
Step 19 Fri Jun 26 18:48:18 2020 P = 25821.565883472253
Step 20 Fri Jun 26 18:56:15 2020 P = 25700.920951763186
Step 21 Fri Jun 26 19:32:54 2020 P = 25588.025305462783
Step 22 Fri Jun 26 19:40:06 2020 P = 25482.561165467723
Step 23 Fri Jun 26 19:47:15 2020 P = 25383.475342625457
Step 24 Fri Jun 26 19:54:18 2020 P = 25289.74904593362
Step 25 Fri Jun 26 20:01:28 2020 P = 25200.29701984593
Step 26 Fri Jun 26 20:08:52 2020 P = 25115.85704328531
Step 27 Fri Jun 26 20:15:53 2020 P = 25035.978863889348
Step 28 Fri Jun 26 20:21:26 2020 P = 24960.26125292126
Step 29 Fri Jun 26 20:27:12 2020 P = 24888.40536113275
Step 30 Fri Jun 26 20:33:05 2020 P = 24819.5120831468
Step 31 Fri Jun 26 20:48:18 2020 P = 24753.51883880618
Step 32 Fri Jun 26 20:54:15 2020 P = 24690.59524831267
Step 33 Fri Jun 26 21:00:15 2020 P = 24630.632428710665
Step 34 Fri Jun 26 21:06:20 2020 P = 24573.316475718257
Step 35 Fri Jun 26 21:12:16 2020 P = 24518.476655021597
Step 36 Fri Jun 26 21:18:13 2020 P = 24465.91176940239
Step 37 Fri Jun 26 21:24:21 2020 P = 24415.60237886584
Step 38 Fri Jun 26 21:30:14 2020 P = 24367.21115117285
Step 39 Fri Jun 26 21:36:06 2020 P = 24321.04265894216
Step 40 Fri Jun 26 21:41:53 2020 P = 24276.783664228875
Step 41 Fri Jun 26 21:48:15 2020 P = 24233.947850857297
Step 42 Fri Jun 26 21:54:34 2020 P = 24191.81422517343
Step 43 Fri Jun 26 22:00:42 2020 P = 24150.937895330193
Step 44 Fri Jun 26 22:06:38 2020 P = 24111.37840853657
Step 45 Fri Jun 26 22:12:30 2020 P = 24073.299354247145
Step 46 Fri Jun 26 22:18:13 2020 P = 24036.467443988044
Step 47 Fri Jun 26 22:23:53 2020 P = 24000.781097476924
Step 48 Fri Jun 26 22:29:36 2020 P = 23965.342450356835
Step 49 Fri Jun 26 22:35:20 2020 P = 23930.986409803205
Step 50 Fri Jun 26 22:40:58 2020 P = 23897.74102650508
Counter({74: 716, 43: 535, 82: 378, 2: 372, 9: 338, 69: 325, 96: 307, 46: 307, 100: 291, 26: 275, 62: 257,
13: 241, 85: 238, 81: 237, 34: 233, 98: 229, 27: 228, 29: 222, 19: 203, 22: 199, 10: 196, 88: 195, 37: 192,
12: 191, 30: 186, 54: 183, 72: 180, 66: 177, 14: 175, 33: 172, 25: 162, 23: 162, 92: 156, 71: 155, 93: 153,
87: 152, 51: 151, 24: 150, 16: 148, 58: 146, 59: 146, 47: 145, 63: 143, 61: 141, 4: 139, 45: 132, 89: 130,
8: 128, 17: 126, 56: 114, 50: 105, 77: 102, 42: 101, 1: 96, 55: 96, 97: 95, 53: 91, 94: 89, 5: 87, 70: 83,
90: 81, 44: 76, 3: 75, 60: 74, 40: 73, 52: 64, 15: 63, 84: 56, 20: 51, 57: 45, 64: 44, 67: 44, 65: 43,
21: 39, 31: 37, 11: 37, 39: 36, 79: 35, 38: 35, 36: 34, 76: 32, 91: 32, 99: 32, 80: 31, 73: 31, 68: 31,
7: 29, 6: 29, 35: 29, 28: 28, 49: 27, 86: 26, 78: 23, 83: 18, 41: 18, 32: 18, 95: 15, 18: 14, 48: 13, 75: 12})
[ 4.05454376 11.62439994 5.35090521 10.21779738 5.38488086 4.88804169
6.45439318 4.47221373 6.82371194 3.86190616 6.63030857 10.48803867
8.50043207 12.85720235 6.88622577 9.64618704 5.53821957 3.42654986
14.24940428 8.34854049 5.94249298 4.1781549 8.24872225 6.83374884
5.16954444 8.70064723 6.59041566 16.58978487 5.74940007 6.40942257
5.56578619 4.98598796 5.50435068 12.1487182 4.98507279 15.17351806
8.5270313 7.25867952 7.46223888 8.54987284 4.29491669 6.82907384
6.32639067 8.72948019 34.68741951 10.18998296 4.66936744 3.95455431
7.28913629 5.29245958 6.03686358 8.35563046 7.32016579 8.85448954
7.31476439 8.50948135 4.54601622 5.36978028 6.10221564 10.6051472
7.52906457 12.99379712 5.5123456 5.8265611 28.44130374 4.28100739
8.9826142 6.71189291 4.08105717 7.96997566 14.47667431 3.35765732
5.58266745 5.16368754 5.84057527 5.16339998 11.07494086 5.71103674
5.75381237 6.34762824 6.48023627 17.33144425 3.02270416 5.11445702
8.99156091 4.34078924 6.18525871 13.81718416 5.29489383 6.22697094
6.5847424 7.09876027 12.36251002 4.8312286 7.8537402 6.00655082
6.36702895 14.67459373 6.44346647 9.41246575]
[-0.025928334494264682, 0.1144546869871288, -0.0013472481771064082, -0.031927777482763986,
-0.004313016897881106, -0.00699245673647475, -0.0015729675097126972, 0.002791108727578262,
-0.015176309951074884, 0.02612313950531675, -0.005059892097893481, -0.018538779587036203,
-0.04524243224014768, 0.008081271659635902, -0.02865593821978507, 0.03928248087084896,
0.0009489417535606393, -0.0012566040340504792, 0.032870339083677536, -0.006793812450089476,
-0.006364208717405617, 0.0009074757369518238, 0.0256754633622851, 0.008267497990432027,
-0.021718770361842665, -0.0062467886607109335, -0.016722265790322766, -0.013430319719322625,
0.007584756964511996, -0.014342401232742219, 0.00143150338578657, -0.00046478858316501004,
0.010880087870412503, 0.001978338664413215, -0.001197435634706423, -0.07678613834789694,
-0.0005183982789738195, -0.17664520115540938, -0.0033090603093270943, -0.003914506240528581,
-0.0024963667913526777, -0.004867813612547955, -0.003569978857913547, -0.0023515446784792005,
0.09853319878786237, -0.015712379286672018, -0.004952953796402149, -0.01716893317577206,
0.00011531243961737658, 0.01859581043121228, -0.03594316395810537, -0.020929434316045104,
0.029233701526942824, -0.0004918877741264538, 0.014664958394515537, 0.04279175364574073,
-0.01421079569809347, -0.007050769142508351, -0.00024851183018359535, -0.003361476597673274,
-0.01181351026306654, 0.062419526846387186, 0.002740524162542002, -0.0008589121273026024,
-0.18451054251177013, 0.016661863674276134, -0.029763920014746148, -0.010861542748953568,
0.017336790725751783, -0.003030324323872513, 0.02388870113427366, -0.0023898562698021486,
-0.008281013672157833, 0.001204656988305075, -0.0035249214065657952, -0.005028994067608927,
-0.009143598388741125, 0.0001842641666431888, -0.003246097173422058, -0.006093907263841736,
0.028720021825460407, 0.01906446383464555, -0.009965711589375648, 0.0012965051648876624,
0.05214469907609676, -0.0029059085015017416, -0.0002010797496776462, -0.0669201499911729,
-0.006648472563363761, -0.0008849414295104552, -0.007582636995611125, -0.050714795904602816,
0.02322280274769284, 0.009402623811269883, -0.0042092851501269735, -0.03256114232137097,
-0.03782291329565357, 0.011091031786282457, -0.007262363740834665, 0.05582725861848381]
[138.28608428 423.02266993 233.55764966 320.07686232 213.95084233
78.82766239 91.04522391 145.43240218 377.9023316 209.89528724
118.97389699 435.4533143 491.59888304 357.89294698 188.47771676
191.65846628 229.64649407 34.00702841 512.27160155 139.4013752
94.61029863 223.29604307 275.04798873 201.44927137 275.12977501
512.9267657 435.17907035 98.28680013 246.06871613 290.74591877
98.72285522 48.19460558 175.27298408 688.2778248 80.3979748
104.76666333 594.72455405 86.36484467 113.81693121 191.61051743
45.56410756 259.00567566 619.47869708 207.99498474 316.21555459
543.28895417 178.53457517 32.91495765 79.91451842 248.03106489
215.47006128 206.0436381 198.8648244 419.11799502 256.74663038
237.16272076 130.52070509 306.52774164 249.64976942 256.20193799
208.88057602 594.4279203 206.29752855 134.87142488 147.09432065
202.68372227 131.36981843 86.33949165 288.52857689 295.41117951
382.58354633 189.09040856 93.62595345 507.10461038 39.7513201
85.34958462 299.1605274 70.2400814 107.83892673 81.99345351
308.70222024 665.46987893 37.33853759 153.12982178 238.10338717
64.73530928 185.93473351 421.76347936 288.03187434 204.24181085
86.2254489 226.43184761 272.55139341 166.03266799 42.05405584
488.25385889 149.63468163 420.37872852 94.72800245 457.83806006]
P = 23897.74102650508
Times repeat = :
0
You entered 0
>>>
===== Hierarchical clustering of leaders =====
[[vlado:work:alg:hctq|Hierarchical clustering of TQs]]
HC = hclusTQ(Rez['leaders'],nVar,alpha)
js = open("TerrorHC.json",'w'); json.dump(HC, js, indent=1); js.close()
> js <- "TerrorHC.json"
> R <- fromJSON(js)
> attr(R,"class") <- "hclust"
> plot(R,hang=-1,cex=0.7)
{{vlado:work:pics:terror100dC.png?800}}
> library(wordcloud)
> js <- "Terror100.json"
> Rez <- fromJSON(js)
> js <- "Totals.json"
> Tot <- fromJSON(js)
> names(R)
[1] "proc" "merge" "height" "order" "labels" "method"
[7] "call" "dist.method" "leaders"
> names(Rez)
[1] "proc" "clust" "leaders" "R" "p"
> unitTQ <- function(unit){
+ total <- unit[[1]][[2]][[1]][[2]]
+ TQ <- unit[[1]][[2]][[1]][[1]]
+ name <- unit[[1]][[1]]
+ TQ[,3] <- TQ[,3]/total
+ return(list(name,TQ))
+ }
> L = R$leaders
> W = Tot[,1]
> F = as.integer(Tot[,2])
> C = Rez$clust
> c95 <- C %in% c(9,69,100)
> sum(c95)
[1] 954
> wordcloud(W[c95],F[c95],scale=c(5,.5),max.words=100)
> L74 <- C %in% c(74)
> sum(L74)
[1] 716
> wordcloud(W[L74],F[L74],scale=c(5,.5),max.words=100)
> L43 <- C %in% c(43)
> sum(L43)
[1] 535
> wordcloud(W[L43],F[L43],scale=c(5,.5),max.words=100)
> L46 <- C %in% c(46)
> sum(L46)
[1] 307
> wordcloud(W[L46],F[L46],scale=c(5,.5),max.words=100)
> C58 <- C %in% c(38,89,36,42,84,90,31,35,40,55,18,48,32,76,11,65,7,49,68,78,39,28,80,
15,21,91,67,73,6,86,79,41,83,75,95)
> sum(C58)
[1] 1396
> wordcloud(W[C58],F[C58],scale=c(5,.5),max.words=100)
> C81 <- C %in% c(72,87,92,62,13,58,45,1,4,5,50,53,44,52,3,20,77,64,99)
> sum(C81)
[1] 2226
> wordcloud(W[C81],F[C81],scale=c(5,.5),max.words=100)
> C88 <- C %in% c(33,10,66,29,26,23,93,30,14,16,47,51,61,8,88,24,12,63,27,54,25,34,56,
59,17,60,57,70,37,94,97,98)
> sum(C88)
[1] 5109
> wordcloud(W[C88],F[C88],scale=c(5,.5),max.words=100)
> C46 <- C %in% c(19,71)
> sum(C46)
[1] 358
> wordcloud(W[C46],F[C46],scale=c(5,.5),max.words=100)
WCL74, WCL43
{{vlado:work:pics:wcl74.png?400}} {{vlado:work:pics:wcl43.png?400}}
WCL46, WCC46
{{vlado:work:pics:wcl46.png?400}} {{vlado:work:pics:wcc46.png?400}}
WCC58, WCC81
{{vlado:work:pics:wcc58.png?400}} {{vlado:work:pics:wcc81.png?400}}
WCC88, WCC95
{{vlado:work:pics:wcc88.png?400}} {{vlado:work:pics:wcc95.png?400}}
WCC94
{{vlado:work:pics:wcc94.png?400}}
L74 [2,] 2 3 0.589369496
[[https://github.com/bavla/Nets/wiki/hist|Drawing TQs]]
> source("https://raw.githubusercontent.com/bavla/Nets/master/source/hist.R")
> LL <- Rez$leaders; CC <- R$leaders
> coHist(unitTQ(LL[74]),unitTQ(CC[98]),1,66,lab="L74:C98",ylim=c(0,0.06),cex.names=0.5,cex.lab=1.5,xlab=50)
> coHist(unitTQ(CC[58]),unitTQ(CC[81]),1,66,lab="C58:C81",ylim=c(0,0.07),cex.names=0.5,cex.lab=1.5,xlab=50)
> coHist(unitTQ(CC[46]),unitTQ(CC[95]),1,66,lab="C46:C95",ylim=c(0,0.25),cex.names=0.5,cex.lab=1.5,xlab=50)
> siHist(unitTQ(CC[46]),1,66,TRUE,ylim=c(0,0.20),cex.names=0.5,cex.lab=1.5,col="royalblue1",xlab=50)
> coHist(unitTQ(LL[43]),unitTQ(CC[97]),1,66,lab="L43:C97",ylim=c(0,0.20),cex.names=0.5,cex.lab=1.5,xlab=50)
[1,] 1 2 0.439295134
[2,] 2 3 0.098072566
> unitTQ(CC[95])
[3,] 3 4 0.166539651
[4,] 4 5 0.156973537
[5,] 5 6 0.225025874
> coHist(unitTQ(CC[96]),unitTQ(CC[95]),1,66,lab="C96:C95",ylim=c(0,0.06),cex.names=0.5,cex.lab=1.5,xlab=50)
> coHist(unitTQ(CC[88]),unitTQ(CC[94]),1,66,lab="C88:C94",ylim=c(0,0.07),cex.names=0.5,cex.lab=1.5,xlab=50)
> X <- unitTQ(LL[96])
> unitTQ(CC[66])[[2]][1:6,]
[,1] [,2] [,3]
[1,] 1 2 0.022506687
[2,] 2 3 0.050189264
[3,] 3 4 0.039212227
[4,] 4 5 0.196113024
[5,] 5 6 0.291704826
[6,] 6 7 0.034845800
> coHist(unitTQ(LL[96]),unitTQ(CC[66]),1,66,lab="L96:C66",ylim=c(0,0.06),cex.names=0.5,cex.lab=1.5,xlab=50)
L74 [1,] 1 2 0.052054299 [2,] 2 3 0.589369496
L43 [1,] 1 2 0.439295134 [2,] 2 3 0.098072566
{{vlado:work:pics:l74-c98.png?400}} {{vlado:work:pics:l43-c97.png?400}}
C95 [3,] 3 4 0.166539651
[3,] 3 4 0.166539651
[4,] 4 5 0.156973537
[5,] 5 6 0.225025874
{{vlado:work:pics:c96-c95.png?400}} {{vlado:work:pics:c88-c94.png?400}}
L96
[15,] 15 16 0.352360449 ;
C66
[4,] 4 5 0.196113024
[5,] 5 6 0.291704826
{{vlado:work:pics:c58-c81.png?400}} {{vlado:work:pics:l96-c66.png?400}}
{{vlado:work:pics:c46-c95.png?400}} {{vlado:work:pics:c46.png?400}}
===== Using package clusTQ =====
I joined leaders method and hierarchical clustering method in package ''[[https://github.com/bavla/TQ|clusTQ]]''
wdir = "C:/Users/batagelj/work/Python/graph/Nets/clusTQ"
gdir = 'c:/users/batagelj/work/python/graph/Nets'
import sys, os, re, datetime, json
sys.path = [gdir]+sys.path; os.chdir(wdir)
from TQ import *
from Nets import Network as N
import clusTQ as cl
G = N.loadNetsJSON("C:/Users/batagelj/work/Python/graph/JSON/terror/terror.json")
G.Info()
nVar = 1; alpha = [1]
Ter = [[G._nodes[u][3]['lab'], [[G.TQnetSum(u), TQ.total(G.TQnetSum(u))]]]\
for u in G.nodes() ]
# Rez = cl.leaderTQ(Ter,100,nVar,alpha,trace=1,tim=5)
# js = open("Terror100.json",'w'); json.dump(Rez, js, indent=1); js.close()
# Tot = [ (t[0],t[1][0][1]) for t in Ter ]
# js = open("Totals.json",'w'); json.dump(Tot, js, indent=1); js.close()
# HC = cl.hclusTQ(Rez['leaders'],nVar,alpha)
# js = open("TerrorHC.json",'w'); json.dump(HC, js, indent=1); js.close()
===== Post analysis of saved data =====
==== Reading saved data in Python ====
wdir = "C:/Users/batagelj/work/Python/graph/Nets/clusTQ"
gdir = 'c:/users/batagelj/work/python/graph/Nets'
import sys, os, re, datetime, json
sys.path = [gdir]+sys.path; os.chdir(wdir)
from TQ import *
from Nets import Network as N
from numpy import random
import numpy as np
from copy import copy, deepcopy
import collections
def table(arr): return collections.Counter(arr)
with open("TerrorHC.json") as json_file: R = json.load(json_file)
with open("Terror100.json") as json_file: Rez = json.load(json_file)
with open("Totals.json") as json_file: Tot = json.load(json_file)
R.keys()
# dict_keys(['proc', 'merge', 'height', 'order', 'labels', 'method', 'call', 'dist.method', 'leaders'])
Rez.keys()
# dict_keys(['proc', 'clust', 'leaders', 'R', 'p'])
G = N.loadNetsJSON("C:/Users/batagelj/work/Python/graph/JSON/terror/terror.json")
G.Info()
==== Reading saved data in R ====
and creating a cut partition into 5 clusters from the hierarchy
wdir <- "C:/Users/batagelj/work/Python/graph/Nets/clusTQ"
library(rjson)
setwd(wdir)
js <- "TerrorHC.json"; R <- fromJSON(file=js); attr(R,"class") <- "hclust"
js <- "Terror100.json"; Rez <- fromJSON(file=js)
js <- "Totals.json"; Tot <- fromJSON(file=js)
R$merge <- matrix(unlist(R$merge),nrow=99,ncol=2,byrow=TRUE)
p <- cutree(R,k=5)
as.vector(p)
[1] 1 1 1 1 1 1 1 2 3 2 1 2 1 2 1 2 2 1 1 1 1 1 2 2 2 2 2 1 2 2 1 1 2 2 1 1 2 1 1 1
[41] 1 1 4 1 1 1 2 1 1 1 2 1 1 2 1 2 2 1 2 2 2 1 2 1 1 2 1 1 3 2 1 1 1 5 1 1 1 1 1 1
[81] 1 1 1 1 1 1 1 2 1 1 1 1 2 2 1 1 2 2 1 3
table(Rez$clust)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
96 372 75 139 87 29 29 128 338 196 37 191 241 175 63 148 126 14 203 51 39
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
199 162 150 162 275 228 28 222 186 37 18 172 233 29 34 192 35 36 73 18 101
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
535 76 132 307 145 13 27 105 151 64 91 183 96 114 45 146 146 74 141 257 143
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
44 43 177 44 31 325 83 155 180 31 716 12 32 102 23 35 31 237 378 18 56
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
238 26 152 195 130 81 32 156 153 89 15 307 95 229 32 291
q <- p[Rez$clust]
table(q)
1 2 3 4 5
6018 5109 954 535 716
c94 <- as.vector((1:100)[p==1])
C94 <- C %in% c94
wordcloud(W[C94],F[C94],scale=c(5,.5),max.words=100)
I manually converted the partition p into Python assignment and computed the corresponding partition of units C
>>> p5 = [ 1, 1, 1, 1, 1, 1, 1, 2, 3, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1,
1, 1, 4, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2,
2, 1, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 3 ]
>>> q = [ p5[i-1] for i in Rez['clust'] ]
>>> table(q)
Counter({1: 6018, 2: 5109, 3: 954, 5: 716, 4: 535})
>>> C = [[] for r in range(5)]
>>> for i in range(1,6): C[i-1] = [ v for v in G.nodes() if q[v-1]==i ]
>>> Nam = [ 'C94', 'C88', 'C95', 'L43', 'L74' ]
>>> Num = [ len(C[i]) for i in range(5) ]
>>> Num
[6018, 5109, 954, 535, 716]
>>> M = [[[] for r in range(5)] for s in range(5)]
>>> T = [[0 for r in range(5)] for s in range(5)]
>>> for r in range(5):
for s in range(r,5):
M[r][s] = G.TQactivity(C[r],C[s])
>>> for r in range(5): M[r][r] = TQ.prodConst(M[r][r],1/2)
>>> for r in range(5):
for s in range(r,5):
T[r][s] = TQ.total(M[r][s])
>>> T
[[143549, 67801, 5422, 2816, 2939],
[ 0, 18288, 739, 357, 357],
[ 0, 0, 535, 53, 54],
[ 0, 0, 0, 205, 51],
[ 0, 0, 0, 0, 281]]
>>> Q = [[0 for r in range(5)] for s in range(5)]
>>> for r in range(5):
for s in range(r,5):
Q[r][s] = T[r][s]/sqrt(len(C[r])*len(C[s]))
>>> Q
[[23.85, 12.23, 2.26, 1.57, 1.42],
[ 0, 3.58, 0.33, 0.22, 0.19],
[ 0, 0, 0.56, 0.07, 0.07],
[ 0, 0, 0, 0.38, 0.08],
[ 0, 0, 0, 0, 0.39]]
Saving BM in JSON
>>> BM = []
>>> for r in range(5):
for s in range(r,5):
BM.append([ Nam[r]+'-'+Nam[s], [[M[r][s], TQ.total(M[r][s])]]])
>>> js = open("BM.json",'w'); json.dump(BM, js, indent=1); js.close()
Drawing BM TQs in R
> unitTQ <- function(unit){
+ total <- unit[[1]][[2]][[1]][[2]]
+ TQ <- matrix(unlist(unit[[1]][[2]][[1]][[1]]),ncol=3,byrow=TRUE)
+ name <- unit[[1]][[1]]
+ TQ[,3] <- TQ[,3]/total
+ return(list(name,TQ))
+ }
> js <- "BM.json"; BM <- fromJSON(file=js)
> for(b in 1:15){
+ siHist(unitTQ(BM[b]),1,66,TRUE,ylim=c(0,0.07),cex.names=0.5,cex.lab=1.5,col="royalblue1",xlab=50,PDF=TRUE)
+ # ans <- readline(prompt=paste(str(b),". Press enter to continue"))
+ }