====== Visualization ======
===== Preparing the data =====
The {{book:temp:private:ana:foreignplayersfrance.zip|raw data for France}} are lists (country name, number of players) for different periods.
> getwd()
[1] "D:/Data/football/Pat"
> a <- read.csv("france.csv",header=TRUE,sep=";",stringsAsFactors=FALSE)
> names(a)
[1] "C46.60" "X1946.1960" "C60.70" "X1960.1970" "C70.80"
[6] "X1970.1980" "C80.90" "X1980.1990" "C90.00" "X1990.00"
[11] "C2000.10" "X2000.10" "X" "X.1" "C90.94"
[16] "X1990.94" "C96.00" "X1996.00"
> C <- setdiff(union(a$C46.60,c(a$C60.70,a$C70.80,a$C80.90,a$C90.00,a$C2000.10)),"")
> C
[1] "ALG" "MAR" "ESP" "HUN" "ARG" "ITA" "AUT" "NED" "SWE" "CZE" "DEN" "BRA"
[13] "SUI" "URU" "ENG" "CMR" "LVA" "GER" "TUN" "NOR" "CIV" "PAR" "LUX" "SCO"
[25] "YUG" "WAL" "SEN" "MLI" "BEL" "TGO" "SCG" "POL" "ISL" "COG" "UKR" "RUS"
[37] "ROU" "MKD" "GAB" "FIN" "BIH" "USA" "TUR" "SVN" "SVK" "GIN" "GEO" "CRO"
[49] "CHI" "CAN" "ALB" "POR" "PER" "MDG" "HTI" "ISR" "TCD" "BFA" "IRL" "COD"
[61] "BUL" "ARM" "GHA" "NGA" "NIR" "LBR" "HON" "COL" "AUS" "KOR" "BEN" "RSA"
[73] "PAN" "MWI" "MRT" "MEX" "GRE" "CHN" "JPN" "EGY" "ZWE" "ZAM" "SLE" "NER"
[85] "MUS" "KEN" "GNB" "QAT" "LTU" "JAM" "CPV" "AGO"
We combine the lists into a matrix W = Country X Period with Number of players as
values.
setwd("D:/Data/football/Pat")
library(lattice)
a <- read.csv("france.csv",header=TRUE,sep=";",stringsAsFactors=FALSE)
C <- setdiff(union(a$C46.60,c(a$C60.70,a$C70.80,a$C80.90,a$C90.00,a$C2000.10)),"")
T <- matrix(0,nrow=length(C),ncol=8); rownames(T) <- C
V1 <- a$X1946.1960; names(V1) <- a$C46.60
V2 <- a$X1960.1970; names(V2) <- a$C60.70
V3 <- a$X1970.1980; names(V3) <- a$C70.80
V4 <- a$X1980.1990; names(V4) <- a$C80.90
V5 <- a$X1990.00; names(V5) <- a$C90.00
V6 <- a$X2000.10; names(V6) <- a$C2000.10
V7 <- a$X1990.94; names(V7) <- a$C90.94
V8 <- a$X1996.00; names(V8) <- a$C96.00
U1 <- V1[!is.na(V1)]; T[names(U1),1] <- U1
U2 <- V2[!is.na(V2)]; T[names(U2),2] <- U2
U3 <- V3[!is.na(V3)]; T[names(U3),3] <- U3
U4 <- V4[!is.na(V4)]; T[names(U4),4] <- U4
U5 <- V5[!is.na(V5)]; T[names(U5),5] <- U5
U6 <- V6[!is.na(V6)]; T[names(U6),6] <- U6
U7 <- V7[!is.na(V7)]; T[names(U7),7] <- U7
U8 <- V8[!is.na(V8)]; T[names(U8),8] <- U8
colnames(T) <- c("46-60","60-70","70-80","80-90","90-00","00-10","90-94","96-00")
S <- apply(T[,1:6],1,sum); q <- order(-S); W <- T[q,]
The countries in W are ordered by the total number of players:
> W
46-60 60-70 70-80 80-90 90-00 00-10 90-94 96-00
ALG 72 37 21 41 46 72 14 32
ARG 35 43 56 45 38 61 10 28
SEN 4 5 8 24 47 122 14 33
BRA 16 8 8 7 46 117 17 30
SCG 3 29 49 27 39 50 12 26
CMR 9 16 14 28 44 69 22 22
MAR 50 14 8 18 24 52 10 14
CIV 7 11 8 21 35 76 13 22
MLI 4 6 9 2 11 58 0 11
POL 3 3 22 26 23 12 9 13
DEN 19 7 8 21 23 9 12 11
NED 24 2 13 16 22 7 15 7
ESP 38 10 6 2 10 17 3 7
GER 8 8 20 21 13 2 9 4
TUN 7 2 3 1 14 42 3 10
ITA 32 7 7 1 10 10 2 8
GIN 1 2 1 2 22 38 3 17
HUN 35 8 2 8 9 3 5 4
BEL 4 4 2 12 18 25 4 15
CRO 1 5 9 15 20 14 13 7
POR 0 3 4 9 16 31 5 11
SUI 12 4 4 8 13 21 3 9
SWE 23 3 5 2 6 21 4 2
AUT 31 11 7 3 2 1 0 2
URU 11 5 10 12 3 14 2 1
CZE 20 2 1 2 7 20 4 3
BIH 2 2 10 13 12 5 6 5
NGA 0 0 0 2 13 26 5 10
TGO 3 5 3 1 11 15 1 9
COG 3 4 4 3 3 19 0 3
COD 0 0 0 5 9 17 2 7
PAR 6 6 5 4 3 4 2 1
LUX 6 7 7 3 3 1 3 1
GHA 0 0 0 3 8 15 4 4
COL 0 0 0 1 5 20 1 4
ENG 9 0 1 9 6 0 3 3
ROU 2 2 2 0 6 12 0 4
NOR 7 0 0 1 4 11 0 4
GAB 2 2 0 3 2 11 2 0
SVN 1 0 1 5 3 8 0 3
MDG 0 1 0 2 12 3 6 8
RUS 2 0 0 1 7 7 3 4
SVK 1 2 1 1 8 4 6 3
SCO 5 0 0 4 7 0 2 5
BFA 0 0 2 2 0 11 0 0
LBR 0 0 0 1 12 2 7 6
USA 1 1 0 0 4 8 2 2
HTI 0 1 2 0 5 6 1 3
BUL 0 0 0 5 5 4 4 1
CHI 1 0 4 0 2 5 0 2
TCD 0 0 5 4 3 0 2 2
ISL 3 1 0 3 1 2 1 0
AUS 0 0 0 1 7 2 4 3
UKR 2 0 0 1 4 2 2 2
ISR 0 0 6 1 2 0 2 0
IRL 0 0 0 6 3 0 0 3
ARM 0 0 0 4 4 1 2 2
LVA 8 0 0 0 0 0 0 0
TUR 1 0 0 1 1 5 0 1
GRE 0 0 0 0 1 7 0 1
YUG 4 2 1 0 0 0 0 0
ALB 1 0 0 0 3 3 0 3
KOR 0 0 0 0 2 5 0 2
BEN 0 0 0 0 2 5 2 1
JPN 0 0 0 0 0 6 0 0
MKD 2 0 2 0 0 1 0 0
EGY 0 0 0 0 0 5 0 0
WAL 4 0 0 0 0 0 0 0
FIN 2 0 0 0 1 1 0 1
PER 0 1 0 0 0 3 0 0
MRT 0 0 0 0 1 3 0 1
GEO 1 0 0 0 1 1 1 0
NIR 0 0 0 1 2 0 1 1
RSA 0 0 0 0 1 2 0 1
ZWE 0 0 0 0 0 3 0 0
MEX 0 0 0 0 1 1 0 1
ZAM 0 0 0 0 0 2 0 0
SLE 0 0 0 0 0 2 0 0
NER 0 0 0 0 0 2 0 0
MUS 0 0 0 0 0 2 0 0
KEN 0 0 0 0 0 2 0 0
GNB 0 0 0 0 0 2 0 0
CAN 1 0 0 0 0 0 0 0
HON 0 0 0 1 0 0 0 0
PAN 0 0 0 0 1 0 0 1
MWI 0 0 0 0 1 0 0 1
CHN 0 0 0 0 1 0 0 1
QAT 0 0 0 0 0 1 0 0
LTU 0 0 0 0 0 1 0 0
JAM 0 0 0 0 0 1 0 0
CPV 0 0 0 0 0 1 0 0
AGO 0 0 0 0 0 1 0 0
===== Display of absolute values =====
pdf("France.pdf",height=11.7,width=8.3,paper="a4")
levelplot(t(W[92:1,1:6]),scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.6)),
aspect=4, cuts=15,
par.settings=list(regions=list(col=gray(15:0 / 15))),
xlab="years",ylab="countries",main="France")
dev.off()
{{:book:temp:private:ana:france.pdf|France}}
===== Display of normalized columns =====
prob <- function(x) x/sum(x,na.rm=TRUE)
Z <- apply(W,2,prob)
pdf("FranceN.pdf",height=11.7,width=8.3,paper="a4")
levelplot(t(Z[92:1,]),scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.6)),aspect=4, cuts=15,
par.settings=list(regions=list(col=gray(15:0 / 15))),
xlab="years",ylab="countries",main="France")
dev.off()
{{:book:temp:private:ana:francen.pdf|France normalized columns}}
===== Display in colors with selected breaks =====
mbreaks <- c(0,1,3,10,50,122); rbreaks <- mbreaks-0.5; rbreaks[6] <- 123
pdf("France.pdf",height=11.7,width=8.3,paper="a4")
levelplot(t(W[92:1,]),at=rbreaks,
scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.6)),aspect=4,
par.settings=list(regions=list(col=c("white","yellow","cyan","red","blue"))),
colorkey=list(at=1:length(mbreaks),labels = list(labels=mbreaks)),
xlab="years",ylab="countries",main="France")
dev.off()
{{:book:temp:private:ana:francec.pdf|France colors with selected breaks}}
===== Flow of players =====
The regional flows to the top 5 leagues: for france, italy and spain: there are 6
periods:46-60;60-70;70-80;80-90;90-00;00-10; for gemany: there are 5 periods:60-70;70-80;80-90;90-00;00-10; for england: 2 periods: 90-00;00-10.
The regions are:
* AFC Asian Football Confederation
* CSA Confederation of South America
* NAM North America, Central America; Caribbean [CONCACAF]
* CAF Confederation African Football
* WEU Western Europe
* EEU Eastern Europe
The {{:book:temp:private:ana:ch4:flow.txt|source data}} contain square roots of numbers of players.
> setwd("D:/Data/football/Pat/flow")
> T <- read.table("flow.txt",header=TRUE,skip=1,sep=";")
> S <- T[,2:7]
> rownames(S) <- T[,1]
> colnames(S) <- c("46-60","60-70","70-80","80-90","90-00","00-10")
> S <- as.matrix(round(S**2))
> library(lattice)
> Q <- as.vector(S)
> summary(Q[Q>0])
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 11.5 63.0 119.7 139.5 2363.0
> summary(Q[Q>139.5])
Min. 1st Qu. Median Mean 3rd Qu. Max.
148.0 165.2 200.5 346.1 298.5 2363.0
> mbreaks <- c(0,0.5,11.5,63,139.5,200,2363)
> pdf("Flow.pdf",height=11.7,width=8.3,paper="a4")
> levelplot(t(S),scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.6)),aspect=2, cuts=6,
+ par.settings=list(regions=list(col=gray(c(1,0.80,0.65,0.50,0.4,0.3,0)))),at=mbreaks,
+ xlab="years",ylab="countries",main="Flow")
> dev.off()
We get the {{:book:temp:private:ana:ch4:flow.pdf|picture}} and the original table:
> S
46-60 60-70 70-80 80-90 90-00 00-10
CAF-FRA 162 105 86 163 321 680
CSA-FRA 69 63 83 69 97 224
AFC-FRA 0 0 0 1 10 14
EEU-FRA 92 55 100 109 152 153
WEU-FRA 227 67 90 123 163 166
NAM-FRA 2 2 2 1 11 16
CAF-ITA 0 0 0 1 42 109
CSA-ITA 89 51 13 75 184 373
AFC-ITA 0 0 0 0 9 15
EEU-ITA 46 7 0 25 120 181
WEU-ITA 89 42 11 73 186 237
NAM-ITA 0 0 0 0 7 13
CAF-ESP 4 4 3 9 48 71
CSA-ESP 122 131 217 178 303 419
AFC-ESP 0 0 0 0 3 8
EEU-ESP 19 10 9 57 215 107
WEU-ESP 24 9 29 51 149 237
NAM-ESP 2 1 2 14 15 18
CAF-GER 0 0 1 10 67 42
CSA-GER 0 4 2 12 47 30
AFC-GER 0 0 4 8 21 22
EEU-GER 0 34 49 77 297 148
WEU-GER 0 38 96 121 186 95
NAM-GER 0 0 0 3 23 6
CAF-ENG 0 0 0 0 104 325
CSA-ENG 0 0 0 0 44 116
AFC-ENG 0 0 0 0 91 178
EEU-ENG 0 0 0 0 82 167
WEU-ENG 0 0 0 0 1699 2363
NAM-ENG 0 0 0 0 148 274
Removing the England data:
> Q <- as.vector(S[24:1,])
> summary(Q[Q>0])
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 10.00 49.00 83.54 120.50 680.00
> summary(Q[Q>120.5])
Min. 1st Qu. Median Mean 3rd Qu. Max.
121.0 153.0 184.0 221.3 237.0 680.0
> mbreaks <- c(0,0.5,10,49,120.5,184,680)
> pdf("Flow3.pdf",height=11.7,width=8.3,paper="a4")
> levelplot(t(S[24:1,]),scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.9)),aspect=2, cuts=6,
+ par.settings=list(regions=list(col=c("white","yellow","cyan","red","blue","black"))),
+ at=mbreaks,xlab="years",ylab="countries",main="Flow")
> dev.off()
{{:book:temp:private:ana:ch4:flow3.pdf|Picture}} for 4 leagues.
===== Some references =====
* http://learnr.wordpress.com/2009/07/20/ggplot2-version-of-figures-in-lattice-multivariate-data-visualization-with-r-part-6/
* http://learnr.wordpress.com/2010/01/26/ggplot2-quick-heatmap-plotting/#more-2380
* http://www2.warwick.ac.uk/fac/sci/moac/students/peter_cock/r/matrix_contour/
* http://csg.sph.umich.edu/docs/R/graphics-1.pdf
* http://csg.sph.umich.edu/docs/R/
* http://casoilresource.lawr.ucdavis.edu/drupal/blog/2?page=4