Hypernets clustering

Based on SDA clustering

Procedures

Adapted from symclus.R

dMembers <- function(Y,p,q){
  P <- Y[[p]]$R; Q <- Y[[q]]$R; pp <- Y[[p]]$p; pq <- Y[[q]]$p
  R <- P+Q; M <- max(R); t <- as.integer(2*R >= M)
  ppq <- sum((1-t)*R + t*(M-R))
  return(ppq - pp - pq)
}

pMembers <- function(Y,p,q){
  P <- Y[[p]]$R; Q <- Y[[q]]$R; pp <- Y[[p]]$p; pq <- Y[[q]]$p
  R <- P+Q; M <- max(R); t <- as.integer(2*R >= M)
  return(sum((1-t)*R + t*(M-R)))
}

Lupdate <- function(Y,ip,iq){
  pp <- Y[[ip]]$p; pq <- Y[[iq]]$p
  P <- Y[[ip]]$R; Q <- Y[[iq]]$R
  R <- P+Q; M <- max(R); t <- as.integer(2*R >= M)
  s <- Y[[ip]]$s + Y[[iq]]$s
  ppq <- sum((1-t)*R + t*(M-R))
  return(list(L=t,R=R,f=Y[[ip]]$f+Y[[iq]]$f,s=Y[[ip]]$s+Y[[iq]]$s,p=ppq))
}

hyper.cluster <- function(HN,dist=pMembers,dtype="dSets",norm=FALSE,w=NA){
  orDendro <- function(i){if(i<0) return(-i)
    return(c(orDendro(m[i,1]),orDendro(m[i,2])))}

  nUnits <- nrow(HN$links); nmUnits <- nUnits-1; method <- "HiSets"
  npUnits <- nUnits+1; n2mUnits <- nUnits+nmUnits; nNodes <- nrow(HN$nodes)
  H <- HN$links$E; U <- vector("list",n2mUnits)
  names(U)[1:nUnits] <- HN$links$ID
  if(is.na(w[1])) w <- rep(1,nUnits) else method <- "HiSetsW"
  if(norm) method <- paste(method,"N",sep="")
  for(j in 1:nUnits) {v <- rep(0,nNodes); v[H[[j]]] <- 1
    k <- max(1,sum(v)); u <- if(norm) v*(w[j]/k) else v*w[j]  
    U[[j]] <- list(L=v,R=u,f=1,s=w[j],p=0)}
  D <- matrix(nrow=nUnits,ncol=nUnits)
  for(p in 1:nmUnits) for(q in (p+1):nUnits) D[q,p] <- D[p,q] <- dist(U,p,q)
  diag(D) <- Inf
  active <- 1:nUnits; m <- matrix(nrow=nmUnits,ncol=2)
  node <- rep(0,nUnits); h <- numeric(nmUnits)
  for(j in npUnits:n2mUnits) U[[j]] <- list(L=NA,R=NA,f=1,s=1,p=0)
  names(U)[npUnits:n2mUnits] <- paste("L",1:nmUnits,sep="")
  for(k in 1:nmUnits){
    ind <- active[sapply(active,function(i) which.min(D[i,active]))]
    dd <- sapply(active,function(i) min(D[i,active]))
    pq <- which.min(dd)
    p<-active[pq]; q <- ind[pq]; h[k] <- D[p,q]
    if(node[p]==0){m[k,1] <- -p; ip <- p} else {m[k,1] <- node[p]; ip <- node[p]}
    if(node[q]==0){m[k,2] <- -q; iq <- q} else {m[k,2] <- node[q]; iq <- node[q]}
    ik <- nUnits + k
    U[[ik]] <- Lupdate(U,ip,iq)
    active <- setdiff(active,p)
    for(s in setdiff(active,q)){
      is <- ifelse(node[s]==0,s,node[s])
      D[s,q] <- D[q,s] <- dist(U,ik,is)
    }
    node[[q]] <- ik
  }
  for(i in 1:nmUnits) for(j in 1:2) if(m[i,j]>nUnits) m[i,j] <- m[i,j]-nUnits 
  hc <- list(merge=m,height=h,order=orDendro(nmUnits),labels=HN$links$ID,
    method=method,call=NULL,dist.method=dtype,leaders=U[npUnits:n2mUnits],
    attrs=HN$nodes$ID)
  class(hc) <- "hclust"
  return(hc)
}

cluster <- function(hc,q) {
  clu <- function(m,q) return(if(q < 0) -q else union(clu(m,m[q,1]),clu(m,m[q,2]))) 
  return(clu(hc$merge,q))
}

Running

Network tools and formats

> library(jsonlite)
> wdir <- "C:/Users/vlado/docs/papers/2023/SDA/Paris/test"
> setwd(wdir)
> SD <- fromJSON("NetFormats.json")
> str(SD)
> m <- SD$head$nUnits
> V <- data.frame(ID=SD$head$vars$V1$cats); n <- nrow(V)
> info <- list(network=SD$info$dataset,
+   title=SD$info$title, by=SD$info$by, href=SD$info$href,
+   creator="V. Batagelj", date=date(),
+   nNodes=n, nLinks=m, simple=NA )
> H <- data.frame(ID=trimws(rownames(SD$SDF)),E=rep(NA,m))
> E = vector(mode="list",m)
> for(i in 1:m) E[[i]] <- which(SD$SDF$Tools[[i]]==1) 
> H$E <- E
> HN <- list(format="hypernets",info=info,nodes=V,links=H,data=list())
> str(HN)
> wdir <- "C:/Users/vlado/DL/data/hyper"
> setwd(wdir)
> write(toJSON(HN),"NetFormats.json")

> source("hyperNets.R")
> hc <- hyper.cluster(HN,pMembers)
> plot(hc,hang=-1)

Zoo

> wdir <- "C:/Users/vlado/DL/data/hyper"
> setwd(wdir)
> source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R")
> library(jsonlite)

> wdir <- "C:/Users/vlado/DL/data/hyper/All/AllSet_all_raw_data/zoo"
> setwd(wdir)
> HN <- fromJSON("Zoo.json")
> hc <- hyper.cluster(HN,dMembers)
> plot(hc,hang=-1)
> hc <- hyper.cluster(HN,pMembers)
> plot(hc,hang=-1)

FB organizations

Shortening some country names

> library(jsonlite)
> wdir <- "C:/Users/vlado/DL/data/hyper/CIA/orgs"
> setwd(wdir)
> HN <- fromJSON("FB_orgsInc.json")
> HN$links$ID <- gsub("Island","Is",gsub("Islands","Iss",HN$links$ID))
> HN$links$ID <- gsub("Republic","R",gsub("SAR","/",HN$links$ID))
> HN$links$ID[[45]] <- "Cocos Iss"; HN$links$ID[[138]] <- "Micronesia"
> HN$links$ID[[33]] <- "Myanmar"
> write(toJSON(HN),"FB_inOrgs.json")
> str(HN,list.len=6)

Clustering

> source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R")
> library(jsonlite)
> # wdir <- "C:/Users/vlado/DL/data/hyper/CIA/orgs"
> # setwd(wdir)
> # HN <- fromJSON("FB_inOrgs.json")
> HN <- fromJSON("https://raw.githubusercontent.com/bavla/hypernets/main/data/FB_inOrgs.json")
> hc <- hyper.cluster(HN,dMembers)
> plot(hc,hang=-1,cex=0.4)
> hc <- hyper.cluster(HN,pMembers)
> plot(hc,hang=-1,cex=0.4)
> hc <- hyper.cluster(HN,pMembers,dtype="pSets",norm=TRUE,w=HN$links$pop/1000000)
> plot(hc,hang=-1,cex=0.4)

Analysis of clusters

> cluster <- function(hc,q) {
+   clu <- function(m,q) return(if(q < 0) -q else union(clu(m,m[q,1]),clu(m,m[q,2]))) 
+   return(clu(hc$merge,q))
+ }
> cluster(hc,10)
[1]  70  88 103 108 156 176

> top <- function(V,k) return(order(V,decreasing=TRUE)[1:k])
> desc <- function(hc,cl,k){
+   km <- min(k,sum(hc$leaders[[cl]]$L))
+   I <- top(hc$leaders[[cl]]$R,km)
+   T <- hc$leaders[[cl]]$R[I]; names(T) <- hc$attrs[I]
+   return(T)
+ }

> C <- cluster(hc,183)
> HN$links$ID[C]
[1] "Kazakhstan"   "Kyrgyzstan"   "Tajikistan"   "Armenia"      "Azerbaijan"   "Turkmenistan"
[7] "Uzbekistan"  
> desc(hc,183,20)
     ADB      CIS     EAPC     EBRD      FAO     IBRD     ICAO     ICRM      IDA      IFC    IFRCS 
1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 
     ILO      IMF Interpol      IOC      ISO      ITU     MIGA      NAM     OPCW 
1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874 1.721874  

Converting leader vectors into sets

> S <- vector("list",nmUnits)
> for(j in 1:nmUnits) S[[j]] <- which(hc$leaders[[j]]$L==1)
> hc$leaders[[5]]$L
  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [48] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0
 [95] 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
[142] 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
[189] 0 0 1 0 1 1 0 1 0
> S[[5]]
 [1]  33  68  89  90  91  92  99 107 108 111 114 140 142 144 163 167 169 174 186 191 193 194 196
vlado/work/hn/clu.txt · Last modified: 2023/11/12 17:52 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki