First we make in Pajek a hierarchical clustering with relational constraints:
usc3110+.paj
filevars42.vec
Cluster/Create complete cluster [45]
Operations/Multiple Relations Network/Select Relation(s) into One Network [1,3]
Operations/Network+Cluster/Dissimilarity*/Vector Based [Euclidean]
Network/Create Hierarchy/Clustering with RC/Run [Maximum,Tolerant]
MaxTolRS.clu
heightMaxTolRS.vec
sizeMaxTolRS.vec
We continue in R:
> # March 27, 2014 > setwd("D:/Data/counties/pajek2") > load("vars42std.Rdata") > Pajek2R <- function(cling){ + tree <- as.integer(read.csv(cling,header=FALSE,skip=1)$V1) + N <- length(tree); n <- (N+1) %/% 2 + merge <- matrix(0,nrow=(n-1),ncol=2) + for(i in 1:n) if(tree[i]>0){ + k <- tree[i]-n + if(merge[k,1]==0) merge[k,1] <- -i else merge[k,2] <- -i + } + for(i in (n+1):N) if(tree[i]>0){ + k <- tree[i]-n; j <- i-n + if(merge[k,1]==0) merge[k,1] <- j else merge[k,2] <- j + } + return(list(merge=merge,n=n)) + } > > orDendro <- function(m,i){if(i<0) return(-i) + return(c(orDendro(m,m[i,1]),orDendro(m,m[i,2])))} > > orSize <- function(m,i){if(i<0) return(1) + s[i] <<- orSize(m,m[i,1])+orSize(m,m[i,2]) + return(s[i])} > > setwd("D:/Data/counties/book") > RM <- Pajek2R("MaxTolRS.clu") > HM <- read.csv("heightMaxTolRS.vec",header=FALSE,skip=3111)[[1]] > RM$height <- HM > RM$method <- "Maximum/Tolerant:Rook+Sea" > RM$dist.method <- "Euclidean" > RM$labels <- rownames(U) > class(RM) <- "hclust" > RM$call <- "Pajek.data" > source("D:\\Data\\counties\\pajek\\varCutree.R") > n <- 3110; nm <- n-1 > > a <- as.vector(RM$merge) > b <- a[a>0] > m <- min(which(RM$merge[,1]==0)) no non-missing arguments to min; returning Inf > c <- setdiff(1:(m-1),b) Error in 1:(m - 1) : result would be too long a vector > e <- -a[a<0] > f <- setdiff(1:n,e) > f integer(0) > for(i in 3100:3109) cat(i,RM$merge[i,],"\n") 3100 -270 3099 3101 3057 3100 3102 -2873 3101 3103 -3055 3102 3104 3083 3103 3105 2951 3104 3106 -383 3105 3107 3090 3106 3108 -1232 3107 3109 -1825 3108 > RM$order <- orDendro(RM$merge,nm) > RM$labels <- rep(' ',n) > pdf("BKdendMaxTolRS.pdf",width=58.5,height=41.5) > plot(RM,hang=-1,cex.axis=4,main="Maximum/Tolerant",lwd=0.01,oma=c(5,5,5,5)) Error in cl[[2L]] : subscript out of bounds > dev.off() null device 1 > hCutree <- function(R,h){ + label <- function(node,nc){ + left <- R$merge[node,1]; right <- R$merge[node,2] + if(nc==0){ + if(R$height[node] <= h){ + C <<- C+1; nc <- C + } + } + if(left <0){part[ -left] <<- nc} else label( left,nc) + if(right<0){part[-right] <<- nc} else label(right,nc) + } + C <- 0; nc <- 0; nm <- nrow(R$merge); part <- rep(NA,nm+1) + label(nm,nc) + return(part) + } > > derivedTree <- function(R,type='rank'){ + if (type == 'total') { c <- 0; ex <- expression(a+b+R$height[i]) } + else if (type == 'count') { c <- 1; ex <- expression(a+b) } + else { c <- 0; ex <- expression(1+max(a,b)) } + nm <- length(R$height) + h <- rep(c,nm) + for (i in 1:nm){ + j <- R$merge[i,1]; a <- ifelse(j<0,c,h[j]) + j <- R$merge[i,2]; b <- ifelse(j<0,c,h[j]) + h[i] <- eval(ex) + } + return(h) + } > > # 15 clusters > > DM <- RM > hh <- RM$height > DM$height <- hh; DM$height <- derivedTree(DM,'rank') > DM$order <- orDendro(DM$merge,nm) > pM <- hCutree(DM,h=117); fm <- table(pM); sort(fm,decreasing=TRUE) pM 37 67 15 17 42 0 30 8 39 5 45 9 58 41 48 65 1168 668 167 151 147 127 109 61 61 47 43 28 28 23 16 16 7 33 16 24 34 38 52 53 32 2 11 44 27 55 60 40 14 13 12 11 11 11 11 10 9 7 7 7 6 6 6 5 4 20 21 23 35 49 1 3 6 12 13 14 18 19 26 29 4 4 4 4 4 4 3 3 3 3 3 3 3 3 3 3 36 47 54 57 59 68 10 22 25 28 31 43 46 50 51 56 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 61 62 63 64 66 69 2 2 2 2 2 2 > pM <- hCutree(DM,h=116); fm <- table(pM); sort(fm,decreasing=TRUE) pM 38 69 15 17 43 0 30 8 40 68 5 46 9 59 42 49 1166 615 167 151 147 127 109 61 61 53 47 43 28 28 23 16 66 7 33 16 24 34 39 53 54 32 2 11 45 27 56 61 16 14 13 12 11 11 11 11 10 9 7 7 7 6 6 6 41 4 20 21 23 35 50 1 3 6 12 13 14 18 19 26 5 4 4 4 4 4 4 3 3 3 3 3 3 3 3 3 29 36 48 55 58 60 70 10 22 25 28 31 37 44 47 51 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 52 57 62 63 64 65 67 71 2 2 2 2 2 2 2 2 > mali <- as.integer(names(fm)[fm < 20]) > ppM <- pM > ppM[ppM %in% mali] <- 999 > pdM <- as.integer(factor(as.character(ppM),levels=names(table(ppM)))) > table(pdM) pdM 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 127 47 61 28 167 151 109 1166 61 23 147 43 28 53 615 284 > part <- pdM > save(part,file="BKMaxTolRSRank15.Rdata") > pdf("BKdendMaxTolRSRank.pdf",width=58.5,height=41.5) > plot(DM,hang=-1,cex.axis=4,main="Maximum/Tolerant",lwd=0.01) Error in cl[[2L]] : subscript out of bounds > dev.off() null device 1 > rankTree <- function(R){ + nm <- length(R$height) + rank <- rep(0,nm) + for (i in 1:nm){ + j <- R$merge[i,1]; a <- ifelse(j<0,0,rank[j]) + j <- R$merge[i,2]; b <- ifelse(j<0,0,rank[j]) + rank[i] <- 1+max(a,b) + } + return(rank) + } > > setwd("D:/Data/counties/9nations") > load('pq.Rdata') > library(maptools) > gpclibPermit() [1] FALSE > USout <- readShapeSpatial("USA/USA_adm0.shp") > USsta <- readShapeSpatial("USA/USA_adm1.shp") > UScou <- readShapeSpatial("USA/USA_adm2.shp") > col <- c("white","salmon1","blue","lightblue","lightpink","green4","orange","yellow", + "green","red","violet","sienna","orangered","darkorange3","deepskyblue","blueviolet") > clu <- rep(NA,length(UScou$NAME_1)) > for(v in 1:3110) {i <- q[p[v]]; if(!is.na(i)) clu[i] <- part[v]} > setwd("D:/Data/counties/book") > pdf("BKMaxTolRSRank15.pdf",width=10,height=7) > plot(UScou,xlim=c(-124,-67),ylim=c(25,49),col=col[clu],bg="grey97",border="grey",lwd=0.001,asp=1.3) > plot(USsta,xlim=c(-124,-67),ylim=c(25,49),lwd=0.005,border="black",asp=1.3,add=TRUE) > # text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1) > title("Central US - 15 clusters"); dev.off() null device 1 > > # 8 clusters > > pM <- hCutree(DM,h=179); fm <- table(pM); sort(fm,decreasing=TRUE) pM 17 23 15 20 8 0 5 9 19 7 16 2 11 22 18 4 1629 881 167 147 61 49 47 28 23 14 12 7 7 7 5 4 1 3 6 12 13 14 10 21 3 3 3 3 3 3 2 2 > pM <- hCutree(DM,h=178); fm <- table(pM); sort(fm,decreasing=TRUE) pM 18 24 15 17 21 8 0 5 9 20 7 16 2 11 23 19 1478 880 167 151 147 61 50 47 28 23 14 12 7 7 7 5 4 1 3 6 12 13 14 10 22 4 3 3 3 3 3 3 2 2 > mali <- as.integer(names(fm)[fm < 40]) > ppM <- pM > ppM[ppM %in% mali] <- 9999 > pdM <- as.integer(factor(as.character(ppM),levels=names(table(ppM)))) > table(pdM) pdM 1 2 3 4 5 6 7 8 9 50 47 61 167 151 1478 147 880 129 > part <- pdM > save(part,file="BKMaxTolRSRank8.Rdata") > clu <- rep(NA,length(UScou$NAME_1)) > for(v in 1:3110) {i <- q[p[v]]; if(!is.na(i)) clu[i] <- part[v]} > col <- c("white","salmon1","blue","lightblue","lightpink","green4","yellow","orange", + "blueviolet") > pdf("BKMaxTolRSRank8.pdf",width=10,height=7) > plot(UScou,xlim=c(-124,-67),ylim=c(25,49),col=col[clu],bg="grey97",border="grey",lwd=0.001,asp=1.3) > plot(USsta,xlim=c(-124,-67),ylim=c(25,49),lwd=0.005,border="black",asp=1.3,add=TRUE) > # text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1) > title("Central US - 8 clusters"); dev.off() null device 1 > # 2clusters > > pM <- hCutree(DM,h=215); fm <- table(pM); sort(fm,decreasing=TRUE) pM 5 0 2 4 1 3 3083 10 7 4 3 3 > pM <- hCutree(DM,h=214); fm <- table(pM); sort(fm,decreasing=TRUE) pM 5 6 0 2 4 1 3 2002 1081 10 7 4 3 3 > mali <- as.integer(names(fm)[fm < 10]) > ppM <- pM > ppM[ppM %in% mali] <- 9999 > pdM <- as.integer(factor(as.character(ppM),levels=names(table(ppM)))) > table(pdM) pdM 1 2 3 4 10 2002 1081 17 > part <- pdM > save(part,file="BKMaxTolRSRank2.Rdata") > clu <- rep(NA,length(UScou$NAME_1)) > for(v in 1:3110) {i <- q[p[v]]; if(!is.na(i)) clu[i] <- part[v]} > col <- c("white","grey85","grey55","grey30") > pdf("BKMaxTolRSRank2.pdf",width=10,height=7) > plot(UScou,xlim=c(-124,-67),ylim=c(25,49),col=col[clu],bg="grey97",border="white",lwd=0.001,asp=1.3) > plot(USsta,xlim=c(-124,-67),ylim=c(25,49),lwd=0.005,border="black",asp=1.3,add=TRUE) > # text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1) > title("Central US - 2 clusters"); dev.off() null device 1 > > # 15 clusters b > > pM <- hCutree(DM,h=114); fm <- table(pM); sort(fm,decreasing=TRUE) pM 40 71 15 17 45 0 30 8 42 70 5 48 9 61 44 51 1160 613 167 151 147 129 109 61 61 53 47 43 28 28 23 16 68 7 33 16 24 34 41 55 56 32 2 11 47 27 58 63 16 14 13 12 11 11 11 11 10 9 7 7 7 6 6 6 43 4 20 21 23 35 39 52 1 3 6 12 13 14 18 19 5 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3 26 29 36 50 57 60 62 72 10 22 25 28 31 37 38 46 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 49 53 54 59 64 65 66 67 69 73 2 2 2 2 2 2 2 2 2 2 > pM <- hCutree(DM,h=113); fm <- table(pM); sort(fm,decreasing=TRUE) pM 41 73 40 15 17 46 0 30 8 43 71 5 49 9 62 45 52 69 7 33 648 609 512 167 151 147 129 109 61 61 53 47 43 28 28 23 16 16 14 13 16 24 34 42 56 57 32 2 11 48 27 59 64 44 4 20 21 23 35 39 12 11 11 11 11 10 9 7 7 7 6 6 6 5 4 4 4 4 4 4 53 72 1 3 6 12 13 14 18 19 26 29 36 51 58 61 63 74 10 22 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 25 28 31 37 38 47 50 54 55 60 65 66 67 68 70 75 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 > mali <- as.integer(names(fm)[fm < 25]) > ppM <- pM > ppM[ppM %in% mali] <- 9999 > pdM <- as.integer(factor(as.character(ppM),levels=names(table(ppM)))) > table(pdM) pdM 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 129 47 61 28 167 151 109 512 648 61 147 43 28 53 609 317 > part <- pdM > setwd("D:/Data/counties/book") > save(part,file="BKMaxTolRSRank15b.Rdata") > col <- c("white","salmon1","blue","lightblue","lightpink","green4","orange","yellow", + "green","red","violet","sienna","orangered","darkorange3","deepskyblue","blueviolet") > clu <- rep(NA,length(UScou$NAME_1)) > for(v in 1:3110) {i <- q[p[v]]; if(!is.na(i)) clu[i] <- part[v]} > pdf("BKMaxTolRSRank15b.pdf",width=10,height=7) > plot(UScou,xlim=c(-124,-67),ylim=c(25,49),col=col[clu],bg="grey97",border="grey",lwd=0.001,asp=1.3) > plot(USsta,xlim=c(-124,-67),ylim=c(25,49),lwd=0.005,border="black",asp=1.3,add=TRUE) > # text(coordinates(UScou),labels=as.character(UScou$NAME_2),cex=0.1) > title("Central US - 15 clusters"); dev.off() null device 1 >