Pictures for the book

First we make in Pajek a hierarchical clustering with relational constraints:

  1. read usc3110+.paj file
  2. read vars42.vec
  3. Cluster/Create complete cluster [45]
  4. click View/Edit Cluster button and delete first 3 units
  5. Operations/Multiple Relations Network/Select Relation(s) into One Network [1,3]
  6. Operations/Network+Cluster/Dissimilarity*/Vector Based [Euclidean]
  7. Network/Create Hierarchy/Clustering with RC/Run [Maximum,Tolerant]
  8. save partition Clustering with relational constraint (tree) [Max/Tolerant] to MaxTolRS.clu
  9. save vector Clustering with relational constraint (heights) [Max/Tolerant] to heightMaxTolRS.vec
  10. save vector Clustering with relational constraint (size) [Max/Tolerant] to sizeMaxTolRS.vec

We continue in R:

> # March 27, 2014
> setwd("D:/Data/counties/pajek2")
> load("vars42std.Rdata")       
> Pajek2R <- function(cling){
+   tree <- as.integer(read.csv(cling,header=FALSE,skip=1)$V1)
+   N <- length(tree); n <- (N+1) %/% 2
+   merge <- matrix(0,nrow=(n-1),ncol=2)
+   for(i in 1:n) if(tree[i]>0){
+     k <- tree[i]-n
+     if(merge[k,1]==0) merge[k,1] <- -i else merge[k,2] <- -i
+   }
+   for(i in (n+1):N) if(tree[i]>0){
+     k <- tree[i]-n; j <- i-n
+     if(merge[k,1]==0) merge[k,1] <- j else merge[k,2] <- j
+   }
+   return(list(merge=merge,n=n))
+ }
> 
> orDendro <- function(m,i){if(i<0) return(-i)
+   return(c(orDendro(m,m[i,1]),orDendro(m,m[i,2])))}
> 
> orSize <- function(m,i){if(i<0) return(1)
+   s[i] <<- orSize(m,m[i,1])+orSize(m,m[i,2])
+   return(s[i])}
> 
> setwd("D:/Data/counties/book") 
> RM <- Pajek2R("MaxTolRS.clu")
> HM <- read.csv("heightMaxTolRS.vec",header=FALSE,skip=3111)[[1]]
> RM$height <- HM
> RM$method <- "Maximum/Tolerant:Rook+Sea"
> RM$dist.method <- "Euclidean"
> RM$labels <- rownames(U)
> class(RM) <- "hclust"
> RM$call <- "Pajek.data"
> source("D:\\Data\\counties\\pajek\\varCutree.R") 
> n <- 3110; nm <- n-1
> 
> a <- as.vector(RM$merge)
> b <- a[a>0]
> m <- min(which(RM$merge[,1]==0))
  no non-missing arguments to min; returning Inf
> c <- setdiff(1:(m-1),b)
Error in 1:(m - 1) : result would be too long a vector
> e <- -a[a<0]
> f <- setdiff(1:n,e)
> f
integer(0)
> for(i in 3100:3109) cat(i,RM$merge[i,],"\n")
3100 -270 3099 
3101 3057 3100 
3102 -2873 3101 
3103 -3055 3102 
3104 3083 3103 
3105 2951 3104 
3106 -383 3105 
3107 3090 3106 
3108 -1232 3107 
3109 -1825 3108 
> RM$order <- orDendro(RM$merge,nm)
> RM$labels <- rep(' ',n)
> pdf("BKdendMaxTolRS.pdf",width=58.5,height=41.5)
> plot(RM,hang=-1,cex.axis=4,main="Maximum/Tolerant",lwd=0.01,oma=c(5,5,5,5))
Error in cl[[2L]] : subscript out of bounds
> dev.off()
null device 
          1 
> hCutree <- function(R,h){
+   label <- function(node,nc){
+     left <- R$merge[node,1]; right <- R$merge[node,2]
+     if(nc==0){
+       if(R$height[node] <= h){
+         C <<- C+1; nc <- C
+       }
+     }
+     if(left <0){part[ -left] <<- nc} else label( left,nc) 
+     if(right<0){part[-right] <<- nc} else label(right,nc) 
+   }  
+   C <- 0; nc <- 0; nm <- nrow(R$merge); part <- rep(NA,nm+1)
+   label(nm,nc)
+   return(part)
+ }
> 
> derivedTree <- function(R,type='rank'){
+   if (type == 'total') { c <- 0; ex <- expression(a+b+R$height[i]) }
+   else if (type == 'count') { c <- 1; ex <- expression(a+b) }
+   else  { c <- 0; ex <- expression(1+max(a,b)) }
+   nm <- length(R$height)
+   h <- rep(c,nm)
+   for (i in 1:nm){
+     j <- R$merge[i,1]; a <- ifelse(j<0,c,h[j])
+     j <- R$merge[i,2]; b <- ifelse(j<0,c,h[j])
+     h[i] <- eval(ex)
+   }
+   return(h)
+ }
> 
> # 15 clusters
> 
> DM <- RM
> hh <- RM$height
> DM$height <- hh; DM$height <- derivedTree(DM,'rank')
> DM$order <- orDendro(DM$merge,nm)
> pM <- hCutree(DM,h=117); fm <- table(pM); sort(fm,decreasing=TRUE)
pM
  37   67   15   17   42    0   30    8   39    5   45    9   58   41   48   65 
1168  668  167  151  147  127  109   61   61   47   43   28   28   23   16   16 
   7   33   16   24   34   38   52   53   32    2   11   44   27   55   60   40 
  14   13   12   11   11   11   11   10    9    7    7    7    6    6    6    5 
   4   20   21   23   35   49    1    3    6   12   13   14   18   19   26   29 
   4    4    4    4    4    4    3    3    3    3    3    3    3    3    3    3 
  36   47   54   57   59   68   10   22   25   28   31   43   46   50   51   56 
   3    3    3    3    3    3    2    2    2    2    2    2    2    2    2    2 
  61   62   63   64   66   69 
   2    2    2    2    2    2 
> pM <- hCutree(DM,h=116); fm <- table(pM); sort(fm,decreasing=TRUE)
pM
  38   69   15   17   43    0   30    8   40   68    5   46    9   59   42   49 
1166  615  167  151  147  127  109   61   61   53   47   43   28   28   23   16 
  66    7   33   16   24   34   39   53   54   32    2   11   45   27   56   61 
  16   14   13   12   11   11   11   11   10    9    7    7    7    6    6    6 
  41    4   20   21   23   35   50    1    3    6   12   13   14   18   19   26 
   5    4    4    4    4    4    4    3    3    3    3    3    3    3    3    3 
  29   36   48   55   58   60   70   10   22   25   28   31   37   44   47   51 
   3    3    3    3    3    3    3    2    2    2    2    2    2    2    2    2 
  52   57   62   63   64   65   67   71 
   2    2    2    2    2    2    2    2 
> mali <- as.integer(names(fm)[fm < 20]) 
> ppM <- pM
> ppM[ppM %in% mali] <- 999 
> pdM <- as.integer(factor(as.character(ppM),levels=names(table(ppM))))
> table(pdM)
pdM
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
 127   47   61   28  167  151  109 1166   61   23  147   43   28   53  615  284 
> part <- pdM
> save(part,file="BKMaxTolRSRank15.Rdata")
> pdf("BKdendMaxTolRSRank.pdf",width=58.5,height=41.5)
> plot(DM,hang=-1,cex.axis=4,main="Maximum/Tolerant",lwd=0.01)
Error in cl[[2L]] : subscript out of bounds
> dev.off() 
null device 
          1 
> rankTree <- function(R){
+   nm <- length(R$height)
+   rank <- rep(0,nm)
+   for (i in 1:nm){
+     j <- R$merge[i,1]; a <- ifelse(j<0,0,rank[j])
+     j <- R$merge[i,2]; b <- ifelse(j<0,0,rank[j])
+     rank[i] <- 1+max(a,b)
+   }
+   return(rank)
+ }
>
> setwd("D:/Data/counties/9nations")
> load('pq.Rdata')
> library(maptools)
> gpclibPermit()
[1] FALSE
> USout <- readShapeSpatial("USA/USA_adm0.shp")
> USsta <- readShapeSpatial("USA/USA_adm1.shp")
> UScou <- readShapeSpatial("USA/USA_adm2.shp")
> col <- c("white","salmon1","blue","lightblue","lightpink","green4","orange","yellow",
+  "green","red","violet","sienna","orangered","darkorange3","deepskyblue","blueviolet")
> clu <- rep(NA,length(UScou$NAME_1))
> for(v in 1:3110) {i <- q[p[v]]; if(!is.na(i)) clu[i] <- part[v]} 
> setwd("D:/Data/counties/book")
> pdf("BKMaxTolRSRank15.pdf",width=10,height=7)
> plot(UScou,xlim=c(-124,-67),ylim=c(25,49),col=col[clu],bg="grey97",border="grey",lwd=0.001,asp=1.3)
> plot(USsta,xlim=c(-124,-67),ylim=c(25,49),lwd=0.005,border="black",asp=1.3,add=TRUE)
> # text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1)
> title("Central US - 15 clusters"); dev.off() 
null device 
          1 
> 
> # 8 clusters
> 
> pM <- hCutree(DM,h=179); fm <- table(pM); sort(fm,decreasing=TRUE)
pM
  17   23   15   20    8    0    5    9   19    7   16    2   11   22   18    4 
1629  881  167  147   61   49   47   28   23   14   12    7    7    7    5    4 
   1    3    6   12   13   14   10   21 
   3    3    3    3    3    3    2    2 
> pM <- hCutree(DM,h=178); fm <- table(pM); sort(fm,decreasing=TRUE)
pM
  18   24   15   17   21    8    0    5    9   20    7   16    2   11   23   19 
1478  880  167  151  147   61   50   47   28   23   14   12    7    7    7    5 
   4    1    3    6   12   13   14   10   22 
   4    3    3    3    3    3    3    2    2 
> mali <- as.integer(names(fm)[fm < 40]) 
> ppM <- pM
> ppM[ppM %in% mali] <- 9999 
> pdM <- as.integer(factor(as.character(ppM),levels=names(table(ppM))))
> table(pdM)
pdM
   1    2    3    4    5    6    7    8    9 
  50   47   61  167  151 1478  147  880  129 
> part <- pdM
> save(part,file="BKMaxTolRSRank8.Rdata")
> clu <- rep(NA,length(UScou$NAME_1))
> for(v in 1:3110) {i <- q[p[v]]; if(!is.na(i)) clu[i] <- part[v]}  
> col <- c("white","salmon1","blue","lightblue","lightpink","green4","yellow","orange",
+  "blueviolet")
> pdf("BKMaxTolRSRank8.pdf",width=10,height=7)
> plot(UScou,xlim=c(-124,-67),ylim=c(25,49),col=col[clu],bg="grey97",border="grey",lwd=0.001,asp=1.3)
> plot(USsta,xlim=c(-124,-67),ylim=c(25,49),lwd=0.005,border="black",asp=1.3,add=TRUE)
> # text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1)
> title("Central US - 8 clusters"); dev.off()
null device 
          1 
> # 2clusters
>  
> pM <- hCutree(DM,h=215); fm <- table(pM); sort(fm,decreasing=TRUE)
pM
   5    0    2    4    1    3 
3083   10    7    4    3    3  
> pM <- hCutree(DM,h=214); fm <- table(pM); sort(fm,decreasing=TRUE)
pM
   5    6    0    2    4    1    3 
2002 1081   10    7    4    3    3 
> mali <- as.integer(names(fm)[fm < 10]) 
> ppM <- pM
> ppM[ppM %in% mali] <- 9999 
> pdM <- as.integer(factor(as.character(ppM),levels=names(table(ppM))))
> table(pdM)
pdM
   1    2    3    4 
  10 2002 1081   17 
> part <- pdM
> save(part,file="BKMaxTolRSRank2.Rdata")
> clu <- rep(NA,length(UScou$NAME_1))
> for(v in 1:3110) {i <- q[p[v]]; if(!is.na(i)) clu[i] <- part[v]} 
> col <- c("white","grey85","grey55","grey30")
> pdf("BKMaxTolRSRank2.pdf",width=10,height=7)
> plot(UScou,xlim=c(-124,-67),ylim=c(25,49),col=col[clu],bg="grey97",border="white",lwd=0.001,asp=1.3)
> plot(USsta,xlim=c(-124,-67),ylim=c(25,49),lwd=0.005,border="black",asp=1.3,add=TRUE)
> # text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1)
> title("Central US - 2 clusters"); dev.off()
null device 
          1 
> 
> # 15 clusters  b
>
> pM <- hCutree(DM,h=114); fm <- table(pM); sort(fm,decreasing=TRUE)
pM
  40   71   15   17   45    0   30    8   42   70    5   48    9   61   44   51 
1160  613  167  151  147  129  109   61   61   53   47   43   28   28   23   16 
  68    7   33   16   24   34   41   55   56   32    2   11   47   27   58   63 
  16   14   13   12   11   11   11   11   10    9    7    7    7    6    6    6 
  43    4   20   21   23   35   39   52    1    3    6   12   13   14   18   19 
   5    4    4    4    4    4    4    4    3    3    3    3    3    3    3    3 
  26   29   36   50   57   60   62   72   10   22   25   28   31   37   38   46 
   3    3    3    3    3    3    3    3    2    2    2    2    2    2    2    2 
  49   53   54   59   64   65   66   67   69   73 
   2    2    2    2    2    2    2    2    2    2 
> pM <- hCutree(DM,h=113); fm <- table(pM); sort(fm,decreasing=TRUE)
pM
 41  73  40  15  17  46   0  30   8  43  71   5  49   9  62  45  52  69   7  33 
648 609 512 167 151 147 129 109  61  61  53  47  43  28  28  23  16  16  14  13 
 16  24  34  42  56  57  32   2  11  48  27  59  64  44   4  20  21  23  35  39 
 12  11  11  11  11  10   9   7   7   7   6   6   6   5   4   4   4   4   4   4 
 53  72   1   3   6  12  13  14  18  19  26  29  36  51  58  61  63  74  10  22 
  4   4   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   3   2   2 
 25  28  31  37  38  47  50  54  55  60  65  66  67  68  70  75 
  2   2   2   2   2   2   2   2   2   2   2   2   2   2   2   2 
> mali <- as.integer(names(fm)[fm < 25]) 
> ppM <- pM
> ppM[ppM %in% mali] <- 9999 
> pdM <- as.integer(factor(as.character(ppM),levels=names(table(ppM))))
> table(pdM)
pdM
  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16 
129  47  61  28 167 151 109 512 648  61 147  43  28  53 609 317 
> part <- pdM
> setwd("D:/Data/counties/book")
> save(part,file="BKMaxTolRSRank15b.Rdata")
> col <- c("white","salmon1","blue","lightblue","lightpink","green4","orange","yellow",
+  "green","red","violet","sienna","orangered","darkorange3","deepskyblue","blueviolet")
> clu <- rep(NA,length(UScou$NAME_1))
> for(v in 1:3110) {i <- q[p[v]]; if(!is.na(i)) clu[i] <- part[v]} 
> pdf("BKMaxTolRSRank15b.pdf",width=10,height=7)
> plot(UScou,xlim=c(-124,-67),ylim=c(25,49),col=col[clu],bg="grey97",border="grey",lwd=0.001,asp=1.3)
> plot(USsta,xlim=c(-124,-67),ylim=c(25,49),lwd=0.005,border="black",asp=1.3,add=TRUE)
> # text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1)
> title("Central US - 15 clusters"); dev.off() 
null device 
          1 
> 
notes/clu/counties/bk.txt · Last modified: 2017/04/12 15:58 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki