Leaders clustering algorithm

Leaders algorithm

Adapted from Clamix

distul <- function(X,Y){
  P <- X$R; Q <- Y$L
  R <- P+Q; M <- max(R); t <- Y$L # t <- as.integer(2*R >= M)
  return(sum((1-t)*R + t*(M-R)))
}

hyper.leader <- function(HN,maxL,trace=2,tim=1,empty=0,clust=NULL,w=NA,norm=FALSE){
  nUnits <- nrow(HN$links); nmUnits <- nUnits-1; method <- "Leader"
  nNodes <- nrow(HN$nodes)
  v <- rep(0,nNodes); temp <- list(L=v,R=v,f=0,s=0,p=0)  
  H <- HN$links$E; U <- vector("list",nUnits)
  names(U)[1:nUnits] <- HN$links$ID
  if(is.na(w[1])) w <- rep(1,nUnits) else method <- "LeaderW"
  if(norm) method <- paste(method,"N",sep="")
  for(j in 1:nUnits) {v <- rep(0,nNodes); v[H[[j]]] <- 1
    k <- max(1,sum(v)); u <- if(norm) v*(w[j]/k) else v*w[j]  
    U[[j]] <- list(L=v,R=u,f=1,s=w[j],p=0)}
    
  L <- vector("list",maxL); Ro <- numeric(maxL); K <- integer(maxL)
# if not given, random partition into maxL clusters
  if(is.null(clust)) clust <- sample(1:(maxL-empty),nUnits,replace=TRUE)
  step <- 0 
  cat("Hypernets / leader",date(),"\n\n"); flush.console()
  repeat {
    step <- step+1; K <- 0
  # new leaders - determine the leaders of clusters in current partition
    for(k in 1:maxL){L[[k]] <- temp; names(L)[[k]] <- paste("L",k,sep="")}
    for(i in 1:nUnits){j <- clust[[i]]; L[[j]]$R <- L[[j]]$R + U[[i]]$R }
    for(k in 1:maxL){M <- max(L[[k]]$R); L[[k]]$L <- as.integer(2*L[[k]]$R >= M)}
  # new partition - assign each unit to the nearest new leader
  # for(k in 1:maxL) print(L[[k]]$L); flush.console() # TEST
    clust <- integer(nUnits)
    R <- numeric(maxL); p <- double(maxL)
    for(i in 1:nUnits){d <- double(maxL)
      for(k in 1:maxL){d[[k]] <- distul(U[[i]],L[[k]])}
      r <- min(d); j <- which(d==r)
      if(is.infinite(r)){cat("Infinite unit=",i,"\n"); print(U[[i]]); flush.console()}
      if(length(j)==0){
        cat("unit",i,"\n",d,"\n"); flush.console(); print(U[[i]]); flush.console()
        u <- which(is.na(d))[[1]]; cat("leader",u,"\n"); print(L[[u]])
        stop()}
      j <- which(d==r)[[1]];
      clust[[i]] <- j; p[[j]] <- p[[j]] + r; L[[j]]$f <- L[[j]]$f + 1
      if(R[[j]]<r) {R[[j]] <- r; K[[j]] <- i}
    }
  # report
    cat("\nStep",step,date(),"\n"); flush.console()
    if(trace>1) {print(table(clust)); print(R); print(Ro-R); print(p)} 
    print(sum(p)); flush.console()
    if(sum(abs(Ro-R))<0.0000001) break
    Ro <- R; tim <- tim-1
    if(tim<1){
      ans <- readline("Times repeat = ")
      tim <- as.integer(ans); if (tim < 1)  break
    }
  # in the case of empty clusters use the most distant SOs as seeds
    t <- table(clust)
    em <- setdiff(1:maxL,as.integer(names(t)))
    if(length(em)>0){
      cat("*** empty clusters",em,":"); lem <- length(em)
      if(2*lem>maxL){cat(" more than half clusters\n"); stop()}
      iem <- K[rev(order(R))[1:lem]]; clust[iem] <- em
      cat(" Units",iem," used as seeds\n"); flush.console()
    }
  }
  return (list(clust=clust,leaders=L,R=R,p=p))
}

Running

Test on NetFormats

> source("https://raw.githubusercontent.com/bavla/hypernets/main/R/hyperNets.R")
> library(jsonlite)
> reportCl <- function(HN,Cl){
+   C <- Cl$clust; LN <- names(Cl$leaders); nl <- length(LN)
+   for(k in 1:nl)cat("C",k," r =",Cl$R[k],"f =",Cl$leaders[[k]]$f,
+     "p =",Cl$p[k],"\n",LN[k],":",HN$nodes$ID[Cl$leaders[[k]]$L==1],
+     "\nC:",HN$links$ID[which(C==k)],"\n\n")
+ }

> HN <- fromJSON("https://raw.githubusercontent.com/bavla/hypernets/main/data/NetFormats.json")
> hc <- hyper.cluster(HN,pMembers)
> plot(hc,hang=-1)

> source("leader.R")
> Cl <- hyper.leader(HN,3)
> reportCl(HN,Cl)

Zoo

> HZ <- fromJSON("https://raw.githubusercontent.com/bavla/hypernets/main/data/Zoo.json")
> hz <- hyper.cluster(HZ,pMembers)
> plot(hz,hang=-1,cex=0.8)
> Cz <- hyper.leader(HZ,5)
> reportCl(HZ,Cz)
C 1  r = 4 f = 37 p = 47 
 L1 : hair milk predator toothed backbone breathes tail catsize 
C: aardvark antelope bear boar buffalo calf cavy cheetah deer elephant fruitbat giraffe girl goat gorilla hamster hare leopard lion lynx mink mole
 mongoose opossum oryx platypus polecat pony puma pussycat raccoon reindeer squirrel vampire vole wallaby wolf 

C 2  r = 4 f = 21 p = 31 
 L2 : feathers eggs airborne backbone breathes tail 
C: chicken crow dove duck flamingo gull hawk kiwi lark ostrich parakeet penguin pheasant rhea skimmer skua sparrow swan tortoise vulture wren 

C 3  r = 5 f = 27 p = 62 
 L3 : eggs aquatic predator toothed backbone fins tail 
C: bass carp catfish chub crab crayfish dogfish frog Frog haddock herring lobster newt octopus pike piranha pitviper seahorse seasnake seawasp 
slowworm sole starfish stingray toad tuatara tuna 

C 4  r = 5 f = 12 p = 20 
 L4 : eggs airborne breathes 
C: clam flea gnat honeybee housefly ladybird moth scorpion slug termite wasp worm 

C 5  r = 1 f = 4 p = 3 
 L5 : hair milk aquatic predator toothed backbone breathes fins tail catsize 
C: dolphin porpoise seal sealion 

> 
vlado/work/hn/lead.txt · Last modified: 2023/11/12 23:11 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki