Clustering of European countries

Clustering of European countries

March 11, 2018

Ordinary clustering - without constraint

> eu <- "C:/Users/batagelj/work/Delphi/Cluse/Cluse/data/Europe/Europe.csv"
> T <- read.csv2(eu,skip=2,row.names=25,dec=".")
> str(T)
'data.frame':   27 obs. of  24 variables:
 $ area        : num  11100 32375 11781 42823 49373 ...
 $ population  : num  2650 7620 9890 8900 15100 ...
 $ indent      : num  1 2 2 2 2 3 3 1 2 2 ...
 $ percUrban   : num  34 54 87.1 58.7 66.7 80 59 71 75.4 86 ...
 $ density     : num  239 235 839 208 306 ...
 $ popFirstCity: num  6.42 22.31 10.83 10.84 7.69 ...
 $ incomPC     : num  647 5491 7085 2799 4673 ...
 $ percIndust  : num  61.5 61 30 43 45 35 30 38 54 54.6 ...
 $ birthRate   : num  33.3 11 12.3 16.1 18.7 12.2 13.9 14 13.3 9.6 ...
 $ deathRate   : num  8.1 13.5 11.4 10.7 11.5 9.9 9.4 10.1 13.4 11.8 ...
 $ lifeExp     : num  69 73 71 71 71 73 72 72 72 71 ...
 $ inPHBed     : num  164 88 112 116 99 103 66 98 92 87 ...
 $ inPPhysic   : num  159 479 530 465 418 624 703 681 538 516 ...
 $ infMort     : num  86.8 16.9 14 23.7 19.6 8.9 12 11.4 13.1 17.4 ...
 $ illiter     : num  25 0 3 5 0 0 0 0 0 0 ...
 $ higEduc     : num  10.8 12.6 15.2 14.4 10.3 ...
 $ roads       : num  378 629 651 439 925 ...
 $ vehicles    : num  0.381 30.013 30.657 0.438 12.934 ...
 $ cars        : num  0.098 23.99 27.685 0.128 11.106 ...
 $ railway     : num  16.9 126.1 218.6 88.3 166.7 ...
 $ radio       : num  6.79 28.68 40.89 30.9 26.01 ...
 $ tv          : num  0.17 23.25 26.75 17.37 25.12 ...
 $ telephon    : num  0.383 29.934 29.818 9.584 18.166 ...
 $ newspaper   : num  46 320 239 232 300 341 537 214 472 289 ...
> S <- scale(T[,4:24])
> E <- dist(S,diag=TRUE)
> g <- hclust(E)
> plot(g,hang=-1,main="European countries")

Clustering with RC based on a dictionary

> D <- as.matrix(E); n <- nrow(D); np <- n+1
> euRel <- "C:/Users/batagelj/work/Delphi/Cluse/Cluse/data/Europe/EuropeXY.net"
> R <- read_Pajek_net(euRel,2)
> rownames(D) <- colnames(D) <- names(R)
> hD <- new.env()
> for(i in 1:length(R)) for(j in R[[i]]) assign(key(i,j),D[i,j],envir=hD)
> attr(hD,"Size") <- n; attr(hD,"Labels") <- names(R)
> for(i in seq_along(R)) if(is.null(R[[i]])) R[[i]] <- 0
> Ro <- R; Ri <- vector("list",length(R))
> names(Ri)<-names(R)
> for(i in 1:length(R)) for(j in R[[i]]) if(j>0) Ri[[j]] <- union(Ri[[j]],i)
> for(i in seq_along(Ri)) if(is.null(Ri[[i]])) Ri[[i]] <- 0
> sRi <- Ri; sRo <- Ro; sD <- D
> Ri <- sRi; Ro <- sRo; D <- sD
> source("C:\\Users\\batagelj\\work\\R\\RelCon\\relConH.R")
> ls(hD)
 [1] "100" "101" "110" "123" "133" "139" "149" "150" "152" "159" "165" "177"
[13] "178" "186" "191" "214" "219" "221" "234" "239" "240" "246" "248" "250"
[25] "262" "271" "275" "296" "297" "304" "323" "335" "357" "361" "363" "378"
[37] "382" "39"  "390" "414" "418" "43"  "444" "447" "502" "527" "529" "530"
[49] "55"  "555" "557" "582" "61"  "613" "615" "66"  "669" "68"  "71"  "80" 
[61] "83"  "92"  "94" 
> res <- relConH(strategy="tolerant")
Clustering with relational constraint based on a dictionary
by Vladimir Batagelj, March 2018
Method: max   Strategy: tolerant 
[1] "Started: 2018-03-12 04:46:39"
[1] "Finished: 2018-03-12 04:46:39"
> plot(res,hang=-1,main="European countries",sub="dict: tolerant / maximum")
> ls(hD)
character(0)

Clustering with RC based on the class dist

> Ri <- sRi; Ro <- sRo; D <- sD
> source("C:\\Users\\batagelj\\work\\R\\RelCon\\relConD.R")
> res <- relConD(E,strategy="tolerant")
Clustering with relational constraint based on the class dist
by Vladimir Batagelj, March 2018
Method: max   Strategy: tolerant 
[1] "Started: 2018-03-12 04:54:39"
[1] "Finished: 2018-03-12 04:54:39"
> plot(res,hang=-1,main="European countries",sub="dist: tolerant / maximum")

Back to Relational constraints