====== Hypernets clustering ======
Based on [[http://vladowiki.fmf.uni-lj.si/doku.php?id=vlado:work:sda:clu|SDA clustering]]
===== Procedures =====
Adapted from **''symclus.R''**
dMembers <- function(Y,p,q){
P <- Y[[p]]$R; Q <- Y[[q]]$R; pp <- Y[[p]]$p; pq <- Y[[q]]$p
R <- P+Q; M <- max(R); t <- as.integer(2*R >= M)
ppq <- sum((1-t)*R + t*(M-R))
return(ppq - pp - pq)
}
pMembers <- function(Y,p,q){
P <- Y[[p]]$R; Q <- Y[[q]]$R; pp <- Y[[p]]$p; pq <- Y[[q]]$p
R <- P+Q; M <- max(R); t <- as.integer(2*R >= M)
return(sum((1-t)*R + t*(M-R)))
}
Lupdate <- function(Y,ip,iq){
pp <- Y[[ip]]$p; pq <- Y[[iq]]$p
P <- Y[[ip]]$R; Q <- Y[[iq]]$R
R <- P+Q; M <- max(R); t <- as.integer(2*R >= M)
s <- Y[[ip]]$s + Y[[iq]]$s
ppq <- sum((1-t)*R + t*(M-R))
return(list(L=t,R=R,f=Y[[ip]]$f+Y[[iq]]$f,s=Y[[ip]]$s+Y[[iq]]$s,p=ppq))
}
hyper.cluster <- function(HN,dist=pMembers,dtype="dSets",norm=FALSE,w=NA){
orDendro <- function(i){if(i<0) return(-i)
return(c(orDendro(m[i,1]),orDendro(m[i,2])))}
nUnits <- nrow(HN$links); nmUnits <- nUnits-1; method <- "HiSets"
npUnits <- nUnits+1; n2mUnits <- nUnits+nmUnits; nNodes <- nrow(HN$nodes)
H <- HN$links$E; U <- vector("list",n2mUnits)
names(U)[1:nUnits] <- HN$links$ID
if(is.na(w[1])) w <- rep(1,nUnits) else method <- "HiSetsW"
if(norm) method <- paste(method,"N",sep="")
for(j in 1:nUnits) {v <- rep(0,nNodes); v[H[[j]]] <- 1
k <- max(1,sum(v)); u <- if(norm) v*(w[j]/k) else v*w[j]
U[[j]] <- list(L=v,R=u,f=1,s=w[j],p=0)}
D <- matrix(nrow=nUnits,ncol=nUnits)
for(p in 1:nmUnits) for(q in (p+1):nUnits) D[q,p] <- D[p,q] <- dist(U,p,q)
diag(D) <- Inf
active <- 1:nUnits; m <- matrix(nrow=nmUnits,ncol=2)
node <- rep(0,nUnits); h <- numeric(nmUnits)
for(j in npUnits:n2mUnits) U[[j]] <- list(L=NA,R=NA,f=1,s=1,p=0)
names(U)[npUnits:n2mUnits] <- paste("L",1:nmUnits,sep="")
for(k in 1:nmUnits){
ind <- active[sapply(active,function(i) which.min(D[i,active]))]
dd <- sapply(active,function(i) min(D[i,active]))
pq <- which.min(dd)
p<-active[pq]; q <- ind[pq]; h[k] <- D[p,q]
if(node[p]==0){m[k,1] <- -p; ip <- p} else {m[k,1] <- node[p]; ip <- node[p]}
if(node[q]==0){m[k,2] <- -q; iq <- q} else {m[k,2] <- node[q]; iq <- node[q]}
ik <- nUnits + k
U[[ik]] <- Lupdate(U,ip,iq)
active <- setdiff(active,p)
for(s in setdiff(active,q)){
is <- ifelse(node[s]==0,s,node[s])
D[s,q] <- D[q,s] <- dist(U,ik,is)
}
node[[q]] <- ik
}
for(i in 1:nmUnits) for(j in 1:2) if(m[i,j]>nUnits) m[i,j] <- m[i,j]-nUnits
hc <- list(merge=m,height=h,order=orDendro(nmUnits),labels=HN$links$ID,
method=method,call=NULL,dist.method=dtype,leaders=U[npUnits:n2mUnits],
attrs=HN$nodes$ID)
class(hc) <- "hclust"
return(hc)
}
cluster <- function(hc,q) {
clu <- function(m,q) return(if(q < 0) -q else union(clu(m,m[q,1]),clu(m,m[q,2])))
return(clu(hc$merge,q))
}
===== Running =====
==== Network tools and formats ====
> library(jsonlite)
> wdir <- "C:/Users/vlado/docs/papers/2023/SDA/Paris/test"
> setwd(wdir)
> SD <- fromJSON("NetFormats.json")
> str(SD)
> m <- SD$head$nUnits
> V <- data.frame(ID=SD$head$vars$V1$cats); n <- nrow(V)
> info <- list(network=SD$info$dataset,
+ title=SD$info$title, by=SD$info$by, href=SD$info$href,
+ creator="V. Batagelj", date=date(),
+ nNodes=n, nLinks=m, simple=NA )
> H <- data.frame(ID=trimws(rownames(SD$SDF)),E=rep(NA,m))
> E = vector(mode="list",m)
> for(i in 1:m) E[[i]] <- which(SD$SDF$Tools[[i]]==1)
> H$E <- E
> HN <- list(format="hypernets",info=info,nodes=V,links=H,data=list())
> str(HN)
> wdir <- "C:/Users/vlado/DL/data/hyper"
> setwd(wdir)
> write(toJSON(HN),"NetFormats.json")
> source("hyperNets.R")
> hc <- hyper.cluster(HN,pMembers)
> plot(hc,hang=-1)
==== Zoo ====
> wdir <- "C:/Users/vlado/DL/data/hyper"
> setwd(wdir)
> source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R")
> library(jsonlite)
> wdir <- "C:/Users/vlado/DL/data/hyper/All/AllSet_all_raw_data/zoo"
> setwd(wdir)
> HN <- fromJSON("Zoo.json")
> hc <- hyper.cluster(HN,dMembers)
> plot(hc,hang=-1)
> hc <- hyper.cluster(HN,pMembers)
> plot(hc,hang=-1)
==== FB organizations ====
Shortening some country names
> library(jsonlite)
> wdir <- "C:/Users/vlado/DL/data/hyper/CIA/orgs"
> setwd(wdir)
> HN <- fromJSON("FB_orgsInc.json")
> HN$links$ID <- gsub("Island","Is",gsub("Islands","Iss",HN$links$ID))
> HN$links$ID <- gsub("Republic","R",gsub("SAR","/",HN$links$ID))
> HN$links$ID[[45]] <- "Cocos Iss"; HN$links$ID[[138]] <- "Micronesia"
> HN$links$ID[[33]] <- "Myanmar"
> write(toJSON(HN),"FB_inOrgs.json")
> str(HN,list.len=6)
Clustering
> source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R")
> library(jsonlite)
> # wdir <- "C:/Users/vlado/DL/data/hyper/CIA/orgs"
> # setwd(wdir)
> # HN <- fromJSON("FB_inOrgs.json")
> HN <- fromJSON("https://raw.githubusercontent.com/bavla/hypernets/main/data/FB_inOrgs.json")
> hc <- hyper.cluster(HN,dMembers)
> plot(hc,hang=-1,cex=0.4)
> hc <- hyper.cluster(HN,pMembers)
> plot(hc,hang=-1,cex=0.4)
> hc <- hyper.cluster(HN,pMembers,dtype="pSets",norm=TRUE,w=HN$links$pop/1000000)
> plot(hc,hang=-1,cex=0.4)
===== Analysis of clusters =====
> cluster <- function(hc,q) {
+ clu <- function(m,q) return(if(q < 0) -q else union(clu(m,m[q,1]),clu(m,m[q,2])))
+ return(clu(hc$merge,q))
+ }
> cluster(hc,10)
[1] 70 88 103 108 156 176
> top <- function(V,k) return(order(V,decreasing=TRUE)[1:k])
> desc <- function(hc,cl,k){
+ km <- min(k,sum(hc$leaders[[cl]]$L))
+ I <- top(hc$leaders[[cl]]$R,km)
+ T <- hc$leaders[[cl]]$R[I]; names(T) <- hc$attrs[I]
+ return(T)
+ }
> C <- cluster(hc,183)
> HN$links$ID[C]
[1] "Kazakhstan" "Kyrgyzstan" "Tajikistan" "Armenia" "Azerbaijan" "Turkmenistan"
[7] "Uzbekistan"
> desc(hc,183,20)
ADB CIS EAPC EBRD FAO IBRD ICAO ICRM IDA IFC IFRCS
1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874
ILO IMF Interpol IOC ISO ITU MIGA NAM OPCW
1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874
Converting leader vectors into sets
> S <- vector("list",nmUnits)
> for(j in 1:nmUnits) S[[j]] <- which(hc$leaders[[j]]$L==1)
> hc$leaders[[5]]$L
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[48] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0
[95] 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
[142] 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
[189] 0 0 1 0 1 1 0 1 0
> S[[5]]
[1] 33 68 89 90 91 92 99 107 108 111 114 140 142 144 163 167 169 174 186 191 193 194 196