====== Hypernets clustering ====== Based on [[http://vladowiki.fmf.uni-lj.si/doku.php?id=vlado:work:sda:clu|SDA clustering]] ===== Procedures ===== Adapted from **''symclus.R''** dMembers <- function(Y,p,q){ P <- Y[[p]]$R; Q <- Y[[q]]$R; pp <- Y[[p]]$p; pq <- Y[[q]]$p R <- P+Q; M <- max(R); t <- as.integer(2*R >= M) ppq <- sum((1-t)*R + t*(M-R)) return(ppq - pp - pq) } pMembers <- function(Y,p,q){ P <- Y[[p]]$R; Q <- Y[[q]]$R; pp <- Y[[p]]$p; pq <- Y[[q]]$p R <- P+Q; M <- max(R); t <- as.integer(2*R >= M) return(sum((1-t)*R + t*(M-R))) } Lupdate <- function(Y,ip,iq){ pp <- Y[[ip]]$p; pq <- Y[[iq]]$p P <- Y[[ip]]$R; Q <- Y[[iq]]$R R <- P+Q; M <- max(R); t <- as.integer(2*R >= M) s <- Y[[ip]]$s + Y[[iq]]$s ppq <- sum((1-t)*R + t*(M-R)) return(list(L=t,R=R,f=Y[[ip]]$f+Y[[iq]]$f,s=Y[[ip]]$s+Y[[iq]]$s,p=ppq)) } hyper.cluster <- function(HN,dist=pMembers,dtype="dSets",norm=FALSE,w=NA){ orDendro <- function(i){if(i<0) return(-i) return(c(orDendro(m[i,1]),orDendro(m[i,2])))} nUnits <- nrow(HN$links); nmUnits <- nUnits-1; method <- "HiSets" npUnits <- nUnits+1; n2mUnits <- nUnits+nmUnits; nNodes <- nrow(HN$nodes) H <- HN$links$E; U <- vector("list",n2mUnits) names(U)[1:nUnits] <- HN$links$ID if(is.na(w[1])) w <- rep(1,nUnits) else method <- "HiSetsW" if(norm) method <- paste(method,"N",sep="") for(j in 1:nUnits) {v <- rep(0,nNodes); v[H[[j]]] <- 1 k <- max(1,sum(v)); u <- if(norm) v*(w[j]/k) else v*w[j] U[[j]] <- list(L=v,R=u,f=1,s=w[j],p=0)} D <- matrix(nrow=nUnits,ncol=nUnits) for(p in 1:nmUnits) for(q in (p+1):nUnits) D[q,p] <- D[p,q] <- dist(U,p,q) diag(D) <- Inf active <- 1:nUnits; m <- matrix(nrow=nmUnits,ncol=2) node <- rep(0,nUnits); h <- numeric(nmUnits) for(j in npUnits:n2mUnits) U[[j]] <- list(L=NA,R=NA,f=1,s=1,p=0) names(U)[npUnits:n2mUnits] <- paste("L",1:nmUnits,sep="") for(k in 1:nmUnits){ ind <- active[sapply(active,function(i) which.min(D[i,active]))] dd <- sapply(active,function(i) min(D[i,active])) pq <- which.min(dd) p<-active[pq]; q <- ind[pq]; h[k] <- D[p,q] if(node[p]==0){m[k,1] <- -p; ip <- p} else {m[k,1] <- node[p]; ip <- node[p]} if(node[q]==0){m[k,2] <- -q; iq <- q} else {m[k,2] <- node[q]; iq <- node[q]} ik <- nUnits + k U[[ik]] <- Lupdate(U,ip,iq) active <- setdiff(active,p) for(s in setdiff(active,q)){ is <- ifelse(node[s]==0,s,node[s]) D[s,q] <- D[q,s] <- dist(U,ik,is) } node[[q]] <- ik } for(i in 1:nmUnits) for(j in 1:2) if(m[i,j]>nUnits) m[i,j] <- m[i,j]-nUnits hc <- list(merge=m,height=h,order=orDendro(nmUnits),labels=HN$links$ID, method=method,call=NULL,dist.method=dtype,leaders=U[npUnits:n2mUnits], attrs=HN$nodes$ID) class(hc) <- "hclust" return(hc) } cluster <- function(hc,q) { clu <- function(m,q) return(if(q < 0) -q else union(clu(m,m[q,1]),clu(m,m[q,2]))) return(clu(hc$merge,q)) } ===== Running ===== ==== Network tools and formats ==== > library(jsonlite) > wdir <- "C:/Users/vlado/docs/papers/2023/SDA/Paris/test" > setwd(wdir) > SD <- fromJSON("NetFormats.json") > str(SD) > m <- SD$head$nUnits > V <- data.frame(ID=SD$head$vars$V1$cats); n <- nrow(V) > info <- list(network=SD$info$dataset, + title=SD$info$title, by=SD$info$by, href=SD$info$href, + creator="V. Batagelj", date=date(), + nNodes=n, nLinks=m, simple=NA ) > H <- data.frame(ID=trimws(rownames(SD$SDF)),E=rep(NA,m)) > E = vector(mode="list",m) > for(i in 1:m) E[[i]] <- which(SD$SDF$Tools[[i]]==1) > H$E <- E > HN <- list(format="hypernets",info=info,nodes=V,links=H,data=list()) > str(HN) > wdir <- "C:/Users/vlado/DL/data/hyper" > setwd(wdir) > write(toJSON(HN),"NetFormats.json") > source("hyperNets.R") > hc <- hyper.cluster(HN,pMembers) > plot(hc,hang=-1) ==== Zoo ==== > wdir <- "C:/Users/vlado/DL/data/hyper" > setwd(wdir) > source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R") > library(jsonlite) > wdir <- "C:/Users/vlado/DL/data/hyper/All/AllSet_all_raw_data/zoo" > setwd(wdir) > HN <- fromJSON("Zoo.json") > hc <- hyper.cluster(HN,dMembers) > plot(hc,hang=-1) > hc <- hyper.cluster(HN,pMembers) > plot(hc,hang=-1) ==== FB organizations ==== Shortening some country names > library(jsonlite) > wdir <- "C:/Users/vlado/DL/data/hyper/CIA/orgs" > setwd(wdir) > HN <- fromJSON("FB_orgsInc.json") > HN$links$ID <- gsub("Island","Is",gsub("Islands","Iss",HN$links$ID)) > HN$links$ID <- gsub("Republic","R",gsub("SAR","/",HN$links$ID)) > HN$links$ID[[45]] <- "Cocos Iss"; HN$links$ID[[138]] <- "Micronesia" > HN$links$ID[[33]] <- "Myanmar" > write(toJSON(HN),"FB_inOrgs.json") > str(HN,list.len=6) Clustering > source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R") > library(jsonlite) > # wdir <- "C:/Users/vlado/DL/data/hyper/CIA/orgs" > # setwd(wdir) > # HN <- fromJSON("FB_inOrgs.json") > HN <- fromJSON("https://raw.githubusercontent.com/bavla/hypernets/main/data/FB_inOrgs.json") > hc <- hyper.cluster(HN,dMembers) > plot(hc,hang=-1,cex=0.4) > hc <- hyper.cluster(HN,pMembers) > plot(hc,hang=-1,cex=0.4) > hc <- hyper.cluster(HN,pMembers,dtype="pSets",norm=TRUE,w=HN$links$pop/1000000) > plot(hc,hang=-1,cex=0.4) ===== Analysis of clusters ===== > cluster <- function(hc,q) { + clu <- function(m,q) return(if(q < 0) -q else union(clu(m,m[q,1]),clu(m,m[q,2]))) + return(clu(hc$merge,q)) + } > cluster(hc,10) [1] 70 88 103 108 156 176 > top <- function(V,k) return(order(V,decreasing=TRUE)[1:k]) > desc <- function(hc,cl,k){ + km <- min(k,sum(hc$leaders[[cl]]$L)) + I <- top(hc$leaders[[cl]]$R,km) + T <- hc$leaders[[cl]]$R[I]; names(T) <- hc$attrs[I] + return(T) + } > C <- cluster(hc,183) > HN$links$ID[C] [1] "Kazakhstan" "Kyrgyzstan" "Tajikistan" "Armenia" "Azerbaijan" "Turkmenistan" [7] "Uzbekistan" > desc(hc,183,20) ADB CIS EAPC EBRD FAO IBRD ICAO ICRM IDA IFC IFRCS 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 ILO IMF Interpol IOC ISO ITU MIGA NAM OPCW 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 Converting leader vectors into sets > S <- vector("list",nmUnits) > for(j in 1:nmUnits) S[[j]] <- which(hc$leaders[[j]]$L==1) > hc$leaders[[5]]$L [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [48] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 [95] 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 [142] 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 [189] 0 0 1 0 1 1 0 1 0 > S[[5]] [1] 33 68 89 90 91 92 99 107 108 111 114 140 142 144 163 167 169 174 186 191 193 194 196