Visualization

Preparing the data

The raw data for France are lists (country name, number of players) for different periods.

> getwd()
[1] "D:/Data/football/Pat"
> a <- read.csv("france.csv",header=TRUE,sep=";",stringsAsFactors=FALSE)
> names(a)
 [1] "C46.60"     "X1946.1960" "C60.70"     "X1960.1970" "C70.80"    
 [6] "X1970.1980" "C80.90"     "X1980.1990" "C90.00"     "X1990.00"  
[11] "C2000.10"   "X2000.10"   "X"          "X.1"        "C90.94"    
[16] "X1990.94"   "C96.00"     "X1996.00"  
> C <- setdiff(union(a$C46.60,c(a$C60.70,a$C70.80,a$C80.90,a$C90.00,a$C2000.10)),"")
> C
 [1] "ALG" "MAR" "ESP" "HUN" "ARG" "ITA" "AUT" "NED" "SWE" "CZE" "DEN" "BRA"
[13] "SUI" "URU" "ENG" "CMR" "LVA" "GER" "TUN" "NOR" "CIV" "PAR" "LUX" "SCO"
[25] "YUG" "WAL" "SEN" "MLI" "BEL" "TGO" "SCG" "POL" "ISL" "COG" "UKR" "RUS"
[37] "ROU" "MKD" "GAB" "FIN" "BIH" "USA" "TUR" "SVN" "SVK" "GIN" "GEO" "CRO"
[49] "CHI" "CAN" "ALB" "POR" "PER" "MDG" "HTI" "ISR" "TCD" "BFA" "IRL" "COD"
[61] "BUL" "ARM" "GHA" "NGA" "NIR" "LBR" "HON" "COL" "AUS" "KOR" "BEN" "RSA"
[73] "PAN" "MWI" "MRT" "MEX" "GRE" "CHN" "JPN" "EGY" "ZWE" "ZAM" "SLE" "NER"
[85] "MUS" "KEN" "GNB" "QAT" "LTU" "JAM" "CPV" "AGO"

We combine the lists into a matrix W = Country X Period with Number of players as values.

setwd("D:/Data/football/Pat")
library(lattice)
a <- read.csv("france.csv",header=TRUE,sep=";",stringsAsFactors=FALSE) 
C <- setdiff(union(a$C46.60,c(a$C60.70,a$C70.80,a$C80.90,a$C90.00,a$C2000.10)),"")
T <- matrix(0,nrow=length(C),ncol=8); rownames(T) <- C
V1 <- a$X1946.1960; names(V1) <- a$C46.60
V2 <- a$X1960.1970; names(V2) <- a$C60.70
V3 <- a$X1970.1980; names(V3) <- a$C70.80
V4 <- a$X1980.1990; names(V4) <- a$C80.90
V5 <- a$X1990.00; names(V5) <- a$C90.00
V6 <- a$X2000.10; names(V6) <- a$C2000.10
V7 <- a$X1990.94; names(V7) <- a$C90.94
V8 <- a$X1996.00; names(V8) <- a$C96.00
U1 <- V1[!is.na(V1)]; T[names(U1),1] <- U1
U2 <- V2[!is.na(V2)]; T[names(U2),2] <- U2
U3 <- V3[!is.na(V3)]; T[names(U3),3] <- U3
U4 <- V4[!is.na(V4)]; T[names(U4),4] <- U4
U5 <- V5[!is.na(V5)]; T[names(U5),5] <- U5
U6 <- V6[!is.na(V6)]; T[names(U6),6] <- U6
U7 <- V7[!is.na(V7)]; T[names(U7),7] <- U7
U8 <- V8[!is.na(V8)]; T[names(U8),8] <- U8
colnames(T) <- c("46-60","60-70","70-80","80-90","90-00","00-10","90-94","96-00")
S <- apply(T[,1:6],1,sum); q <- order(-S); W <- T[q,]

The countries in W are ordered by the total number of players:

> W
    46-60 60-70 70-80 80-90 90-00 00-10 90-94 96-00
ALG    72    37    21    41    46    72    14    32
ARG    35    43    56    45    38    61    10    28
SEN     4     5     8    24    47   122    14    33
BRA    16     8     8     7    46   117    17    30
SCG     3    29    49    27    39    50    12    26
CMR     9    16    14    28    44    69    22    22
MAR    50    14     8    18    24    52    10    14
CIV     7    11     8    21    35    76    13    22
MLI     4     6     9     2    11    58     0    11
POL     3     3    22    26    23    12     9    13
DEN    19     7     8    21    23     9    12    11
NED    24     2    13    16    22     7    15     7
ESP    38    10     6     2    10    17     3     7
GER     8     8    20    21    13     2     9     4
TUN     7     2     3     1    14    42     3    10
ITA    32     7     7     1    10    10     2     8
GIN     1     2     1     2    22    38     3    17
HUN    35     8     2     8     9     3     5     4
BEL     4     4     2    12    18    25     4    15
CRO     1     5     9    15    20    14    13     7
POR     0     3     4     9    16    31     5    11
SUI    12     4     4     8    13    21     3     9
SWE    23     3     5     2     6    21     4     2
AUT    31    11     7     3     2     1     0     2
URU    11     5    10    12     3    14     2     1
CZE    20     2     1     2     7    20     4     3
BIH     2     2    10    13    12     5     6     5
NGA     0     0     0     2    13    26     5    10
TGO     3     5     3     1    11    15     1     9
COG     3     4     4     3     3    19     0     3
COD     0     0     0     5     9    17     2     7
PAR     6     6     5     4     3     4     2     1
LUX     6     7     7     3     3     1     3     1
GHA     0     0     0     3     8    15     4     4
COL     0     0     0     1     5    20     1     4
ENG     9     0     1     9     6     0     3     3
ROU     2     2     2     0     6    12     0     4
NOR     7     0     0     1     4    11     0     4
GAB     2     2     0     3     2    11     2     0
SVN     1     0     1     5     3     8     0     3
MDG     0     1     0     2    12     3     6     8
RUS     2     0     0     1     7     7     3     4
SVK     1     2     1     1     8     4     6     3
SCO     5     0     0     4     7     0     2     5
BFA     0     0     2     2     0    11     0     0
LBR     0     0     0     1    12     2     7     6
USA     1     1     0     0     4     8     2     2
HTI     0     1     2     0     5     6     1     3
BUL     0     0     0     5     5     4     4     1
CHI     1     0     4     0     2     5     0     2
TCD     0     0     5     4     3     0     2     2
ISL     3     1     0     3     1     2     1     0
AUS     0     0     0     1     7     2     4     3
UKR     2     0     0     1     4     2     2     2
ISR     0     0     6     1     2     0     2     0
IRL     0     0     0     6     3     0     0     3
ARM     0     0     0     4     4     1     2     2
LVA     8     0     0     0     0     0     0     0
TUR     1     0     0     1     1     5     0     1
GRE     0     0     0     0     1     7     0     1
YUG     4     2     1     0     0     0     0     0
ALB     1     0     0     0     3     3     0     3
KOR     0     0     0     0     2     5     0     2
BEN     0     0     0     0     2     5     2     1
JPN     0     0     0     0     0     6     0     0
MKD     2     0     2     0     0     1     0     0
EGY     0     0     0     0     0     5     0     0
WAL     4     0     0     0     0     0     0     0
FIN     2     0     0     0     1     1     0     1
PER     0     1     0     0     0     3     0     0
MRT     0     0     0     0     1     3     0     1
GEO     1     0     0     0     1     1     1     0
NIR     0     0     0     1     2     0     1     1
RSA     0     0     0     0     1     2     0     1
ZWE     0     0     0     0     0     3     0     0
MEX     0     0     0     0     1     1     0     1
ZAM     0     0     0     0     0     2     0     0
SLE     0     0     0     0     0     2     0     0
NER     0     0     0     0     0     2     0     0
MUS     0     0     0     0     0     2     0     0
KEN     0     0     0     0     0     2     0     0
GNB     0     0     0     0     0     2     0     0
CAN     1     0     0     0     0     0     0     0
HON     0     0     0     1     0     0     0     0
PAN     0     0     0     0     1     0     0     1
MWI     0     0     0     0     1     0     0     1
CHN     0     0     0     0     1     0     0     1
QAT     0     0     0     0     0     1     0     0
LTU     0     0     0     0     0     1     0     0
JAM     0     0     0     0     0     1     0     0
CPV     0     0     0     0     0     1     0     0
AGO     0     0     0     0     0     1     0     0 

Display of absolute values

pdf("France.pdf",height=11.7,width=8.3,paper="a4")
levelplot(t(W[92:1,1:6]),scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.6)),
  aspect=4, cuts=15,
  par.settings=list(regions=list(col=gray(15:0 / 15))),
  xlab="years",ylab="countries",main="France")
dev.off()

France

Display of normalized columns

prob <- function(x) x/sum(x,na.rm=TRUE)
Z <- apply(W,2,prob)
pdf("FranceN.pdf",height=11.7,width=8.3,paper="a4")
levelplot(t(Z[92:1,]),scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.6)),aspect=4, cuts=15,
  par.settings=list(regions=list(col=gray(15:0 / 15))),
  xlab="years",ylab="countries",main="France")
dev.off()

France normalized columns

Display in colors with selected breaks

mbreaks <- c(0,1,3,10,50,122); rbreaks <- mbreaks-0.5; rbreaks[6] <- 123
pdf("France.pdf",height=11.7,width=8.3,paper="a4")
levelplot(t(W[92:1,]),at=rbreaks,
  scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.6)),aspect=4, 
  par.settings=list(regions=list(col=c("white","yellow","cyan","red","blue"))),
  colorkey=list(at=1:length(mbreaks),labels = list(labels=mbreaks)),
  xlab="years",ylab="countries",main="France")
dev.off()

France colors with selected breaks

Flow of players

The regional flows to the top 5 leagues: for france, italy and spain: there are 6 periods:46-60;60-70;70-80;80-90;90-00;00-10; for gemany: there are 5 periods:60-70;70-80;80-90;90-00;00-10; for england: 2 periods: 90-00;00-10.

The regions are:

  • AFC Asian Football Confederation
  • CSA Confederation of South America
  • NAM North America, Central America; Caribbean [CONCACAF]
  • CAF Confederation African Football
  • WEU Western Europe
  • EEU Eastern Europe

The source data contain square roots of numbers of players.

> setwd("D:/Data/football/Pat/flow")
> T <- read.table("flow.txt",header=TRUE,skip=1,sep=";")
> S <- T[,2:7]
> rownames(S) <- T[,1]
> colnames(S) <- c("46-60","60-70","70-80","80-90","90-00","00-10")
> S <- as.matrix(round(S**2))
> library(lattice)
> Q <- as.vector(S)
> summary(Q[Q>0])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.0    11.5    63.0   119.7   139.5  2363.0 
> summary(Q[Q>139.5])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  148.0   165.2   200.5   346.1   298.5  2363.0 
> mbreaks <- c(0,0.5,11.5,63,139.5,200,2363)
> pdf("Flow.pdf",height=11.7,width=8.3,paper="a4")
> levelplot(t(S),scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.6)),aspect=2, cuts=6,
+   par.settings=list(regions=list(col=gray(c(1,0.80,0.65,0.50,0.4,0.3,0)))),at=mbreaks,
+   xlab="years",ylab="countries",main="Flow")
> dev.off()

We get the picture and the original table:

> S
         46-60 60-70 70-80 80-90 90-00 00-10
CAF-FRA    162   105    86   163   321   680
CSA-FRA     69    63    83    69    97   224
AFC-FRA      0     0     0     1    10    14
EEU-FRA     92    55   100   109   152   153
WEU-FRA    227    67    90   123   163   166
NAM-FRA      2     2     2     1    11    16
CAF-ITA      0     0     0     1    42   109
CSA-ITA     89    51    13    75   184   373
AFC-ITA      0     0     0     0     9    15
EEU-ITA     46     7     0    25   120   181
WEU-ITA     89    42    11    73   186   237
NAM-ITA      0     0     0     0     7    13
CAF-ESP      4     4     3     9    48    71
CSA-ESP    122   131   217   178   303   419
AFC-ESP      0     0     0     0     3     8
EEU-ESP     19    10     9    57   215   107
WEU-ESP     24     9    29    51   149   237
NAM-ESP      2     1     2    14    15    18
CAF-GER      0     0     1    10    67    42
CSA-GER      0     4     2    12    47    30
AFC-GER      0     0     4     8    21    22
EEU-GER      0    34    49    77   297   148
WEU-GER      0    38    96   121   186    95
NAM-GER      0     0     0     3    23     6
CAF-ENG      0     0     0     0   104   325
CSA-ENG      0     0     0     0    44   116
AFC-ENG      0     0     0     0    91   178
EEU-ENG      0     0     0     0    82   167
WEU-ENG      0     0     0     0  1699  2363
NAM-ENG      0     0     0     0   148   274

Removing the England data:

> Q <- as.vector(S[24:1,])
> summary(Q[Q>0])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00   10.00   49.00   83.54  120.50  680.00 
> summary(Q[Q>120.5])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  121.0   153.0   184.0   221.3   237.0   680.0 
> mbreaks <- c(0,0.5,10,49,120.5,184,680)
> pdf("Flow3.pdf",height=11.7,width=8.3,paper="a4")
> levelplot(t(S[24:1,]),scales=list(x=list(rot=90,cex=0.9),y=list(cex=0.9)),aspect=2, cuts=6,
+   par.settings=list(regions=list(col=c("white","yellow","cyan","red","blue","black"))),
+   at=mbreaks,xlab="years",ylab="countries",main="Flow")
> dev.off()

Picture for 4 leagues.

Some references

book/temp/private/ana/ch4/tab.txt · Last modified: 2017/04/12 13:39 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki