====== Ordinary clustering ====== January 19, 2018 ===== Reading Pajek's clusterings and networks =====


read_Pajek_clu <- function(f,skip=1){
# reads a partition from Pajek's clu file; skip *vertices and comments lines
  read.table(f,skip=skip,colClasses=c("integer"),header=FALSE)$V1
}

read_Pajek_net <- function(f,skip=0){
# reads a network from Pajek's net file; skip comments lines
   L <- readLines(f)
   st <- grep("\\*",L)
   S <- unlist(strsplit(L[st[1]]," "))
   n <- as.integer(S[2]); n1 <- st[1]+1; n2 <- st[2]-1
   m1 <- st[2]+1; m2 <- length(L); m <- m2-m1+1
   Names <- unlist(strsplit(L[n1:n2],'"'))[3*(1:n)-1]
   R <- vector(mode="list",length=n)
   S <- unlist(strsplit(L[m1:m2],'[[:space:]]+'))
   b <- as.integer(S[4*(1:m)-2]); e <- as.integer(S[4*(1:m)-1])
   r <- 1; k <- 0; row <- NULL
   repeat {
      k <- k+1
      if((k>m)|(b[k]!=r)){
         R[[r]] <- row
         if(k>m) break
         r <- b[k]; row <- NULL
      }
      row <- c(row,e[k])
   }
   names(R) <- Names
   return(R)
}


wdir <- "C:/Users/batagelj/work/clamix/relC"
setwd(wdir)
a <- scan("C:/Users/batagelj/work/Delphi/Cluse/Cluse/data/RelCon/SomeTy.dis")
s <- length(a); n <- round((-1+sqrt(1+8*s))/2); nm <- n-1
D <- matrix(0, nrow=n, ncol=n); D[lower.tri(D,diag=TRUE)] <- a; D <- D+t(D)
netRel <- "C:/Users/batagelj/work/Delphi/Cluse/Cluse/data/RelCon/SomeTyXY.net"
R <- read_Pajek_net(netRel,3)
rownames(D) <- colnames(D) <- names(R); print(D)
DD <- as.dist(D); print(DD)


  a b c d e f g h i j
a 0 5 7 4 6 6 2 4 2 3
b 5 0 8 1 3 4 4 5 3 4
c 7 8 0 4 5 7 9 3 2 5
d 4 1 4 0 3 2 4 8 6 3
e 6 3 5 3 0 6 4 6 5 7
f 6 4 7 2 6 0 6 8 5 7
g 2 4 9 4 4 6 0 4 8 2
h 4 5 3 8 6 8 4 0 3 4
i 2 3 2 6 5 5 8 3 0 5
j 3 4 5 3 7 7 2 4 5 0

  a b c d e f g h i
b 5                
c 7 8              
d 4 1 4            
e 6 3 5 3          
f 6 4 7 2 6        
g 2 4 9 4 4 6      
h 4 5 3 8 6 8 4    
i 2 3 2 6 5 5 8 3  
j 3 4 5 3 7 7 2 4 5

===== Clustering =====


hRelClust <- function(D,method="max"){
  orDendro <- function(i){if(i<0) return(-i)
    return(c(orDendro(m[i,1]),orDendro(m[i,2])))}
  numL <- nrow(D); numLm <- numL-1
# each unit is a cluster; compute inter-cluster dissimilarity matrix
  diag(D) <- Inf
  print(D); flush.console()
  N <- rownames(D)
  active <- 1:numL; m <- matrix(nrow=numLm,ncol=2)
  node <- rep(0,numL); h <- numeric(numLm); w <- rep(1,numL)
  for(k in 1:numLm){
  # determine the closest pair of clusters (p,q)
    ind <- active[sapply(active,function(i) which.min(D[i,active]))]
    dd <- sapply(active,function(i) min(D[i,active]))
    print(active); print(ind); print(dd)
    pq <- which.min(dd)
  # join the closest pair of clusters
    p<-active[pq]; q <- ind[pq]; h[k] <- D[p,q]
    cat(pq,'join ',p,'/',N[p],' and ',q,'/',N[q],' at level ',D[p,q],'\n')
    if(node[p]==0) m[k,1] <- -p else m[k,1] <- node[p]
    if(node[q]==0) m[k,2] <- -q else m[k,2] <- node[q]
    active <- setdiff(active,p)
  # determine dissimilarities to the new cluster
    for(s in setdiff(active,q)){
      if(method=="max") D[q,s] <- max(D[q,s],D[p,s]) else
      if(method=="min") D[q,s] <- min(D[q,s],D[p,s]) else
      if(method=="ward") { ww <- w[p]+w[q]+w[s]
        D[q,s] <- ((w[q]+w[s])*D[q,s] + (w[p]+w[s])*D[p,s] - w[s]*h[k])/ww
      } else {cat('unknown method','\n'); return(NULL)}
      D[s,q] <- D[q,s]
    }
    w[q] <- w[q]+w[p]; node[[q]] <- k
  }
  hc <- list(merge=m,height=h,order=orDendro(numLm),labels=rownames(D),
    method=method,call=NULL,dist.method=NULL,leaders=NULL)
  class(hc) <- "hclust"
  return(hc)
}

===== Test ===== ==== Maximum ====


r <- hRelClust(D,method="max")
plot(r,hang=-1,main="max")
t <- hclust(DD,method="complete")
plot(t,hang=-1,main="complete")


    a   b   c   d   e   f   g   h   i   j
a Inf   5   7   4   6   6   2   4   2   3
b   5 Inf   8   1   3   4   4   5   3   4
c   7   8 Inf   4   5   7   9   3   2   5
d   4   1   4 Inf   3   2   4   8   6   3
e   6   3   5   3 Inf   6   4   6   5   7
f   6   4   7   2   6 Inf   6   8   5   7
g   2   4   9   4   4   6 Inf   4   8   2
h   4   5   3   8   6   8   4 Inf   3   4
i   2   3   2   6   5   5   8   3 Inf   5
j   3   4   5   3   7   7   2   4   5 Inf
 [1]  1  2  3  4  5  6  7  8  9 10
 [1] 7 4 9 2 2 4 1 3 1 7
 [1] 2 1 2 1 3 2 2 3 2 2
2 join  2 / b  and  4 / d  at level  1 
[1]  1  3  4  5  6  7  8  9 10
[1] 7 9 5 4 4 1 3 1 7
[1] 2 2 3 3 4 2 3 2 2
1 join  1 / a  and  7 / g  at level  2 
[1]  3  4  5  6  7  8  9 10
[1]  9  5  4  4 10  3  3  7
[1] 2 3 3 4 3 3 2 3
1 join  3 / c  and  9 / i  at level  2 
[1]  4  5  6  7  8  9 10
[1]  5  4  4 10  9  8  7
[1] 3 3 4 3 3 3 3
1 join  4 / d  and  5 / e  at level  3 
[1]  5  6  7  8  9 10
[1]  6  5 10  9  8  7
[1] 6 6 3 3 3 3
3 join  7 / g  and  10 / j  at level  3 
[1]  5  6  8  9 10
[1] 6 5 9 8 8
[1] 6 6 3 3 4
3 join  8 / h  and  9 / i  at level  3 
[1]  5  6  9 10
[1] 6 5 5 5
[1] 6 6 8 7
1 join  5 / e  and  6 / f  at level  6 
[1]  6  9 10
[1] 10  6  6
[1] 7 8 7
1 join  6 / f  and  10 / j  at level  7 
[1]  9 10
[1] 10  9
[1] 9 9
1 join  9 / i  and  10 / j  at level  9

{{pro:pics:max.png}} {{pro:pics:complete.png}} ==== Ward ====


r <- hRelClust(D,method="ward")
plot(r,hang=-1,main="ward")
t <- hclust(DD,method="ward.D")
plot(t,hang=-1,main="ward.D")


    a   b   c   d   e   f   g   h   i   j
a Inf   5   7   4   6   6   2   4   2   3
b   5 Inf   8   1   3   4   4   5   3   4
c   7   8 Inf   4   5   7   9   3   2   5
d   4   1   4 Inf   3   2   4   8   6   3
e   6   3   5   3 Inf   6   4   6   5   7
f   6   4   7   2   6 Inf   6   8   5   7
g   2   4   9   4   4   6 Inf   4   8   2
h   4   5   3   8   6   8   4 Inf   3   4
i   2   3   2   6   5   5   8   3 Inf   5
j   3   4   5   3   7   7   2   4   5 Inf
 [1]  1  2  3  4  5  6  7  8  9 10
 [1] 7 4 9 2 2 4 1 3 1 7
 [1] 2 1 2 1 3 2 2 3 2 2
2 join  2 / b  and  4 / d  at level  1 
[1]  1  3  4  5  6  7  8  9 10
[1] 7 9 5 4 4 1 3 1 7
[1] 2.000000 2.000000 3.666667 3.666667 3.666667 2.000000 3.000000 2.000000
[9] 2.000000
1 join  1 / a  and  7 / g  at level  2 
[1]  3  4  5  6  7  8  9 10
[1]  9  5  4  4 10  3  3  7
[1] 2.000000 3.666667 3.666667 3.666667 2.666667 3.000000 2.000000 2.666667
1 join  3 / c  and  9 / i  at level  2 
[1]  4  5  6  7  8  9 10
[1]  5  4  4 10  9  8  7
[1] 3.666667 3.666667 3.666667 2.666667 3.333333 3.333333 2.666667
4 join  7 / g  and  10 / j  at level  2.666667 
[1]  4  5  6  8  9 10
[1] 5 4 4 9 8 8
[1] 3.666667 3.666667 3.666667 3.333333 3.333333 4.833333
4 join  8 / h  and  9 / i  at level  3.333333 
[1]  4  5  6  9 10
[1] 5 4 4 5 4
[1] 3.666667 3.666667 3.666667 6.666667 7.133333
1 join  4 / d  and  5 / e  at level  3.666667 
[1]  5  6  9 10
[1] 6 5 6 6
[1] 4.833333 4.833333 8.666667 8.333333
1 join  5 / e  and  6 / f  at level  4.833333 
[1]  6  9 10
[1] 10 10  6
[1] 10.40476 11.00000 10.40476
1 join  6 / f  and  10 / j  at level  10.40476 
[1]  9 10
[1] 10  9
[1] 12.49524 12.49524
1 join  9 / i  and  10 / j  at level  12.49524

{{pro:pics:ward.png}} {{pro:pics:wardd.png}} ====== ====== \\ [[pro:relc|Back to Relational constraints]]