====== European airlines 2013 ====== The multiway network **[[https://raw.githubusercontent.com/bavla/ibm3m/master/data/AirEu2013.json|AirEu2013]]** was used in the [[https://www.nature.com/articles/srep01344|paper]] Cardillo A. et al. (2013) Emergence of network features from multiplexity, Scientific Reports 3, 1344. The original data are available at http://complex.unizar.es/~atnmultiplex/ . > airID <- c( + "Lufthansa","Ryanair","Easyjet","British A","Turkish A","Air Berlin", + "Air France","SAS","KLM","Alitalia","Swiss IAL","Iberia","Norwegian AS", + "Austrian A","Flybe","Wizz Air","TAP Portugal","Brussels A","Finnair", + "LOT Polish A","Vueling A","Air Nostrum","Air Lingus","Germanwings", + "Pegasus A","Netjets","Transavia H","Niki","SunExpress","Aegean A", + "Czech A","European AT","Malev HA","Air Baltic","Wideroe","TNT Airways", + "Olympic Air") > airID [1] "Lufthansa" "Ryanair" "Easyjet" "British A" "Turkish A" [6] "Air Berlin" "Air France" "SAS" "KLM" "Alitalia" [11] "Swiss IAL" "Iberia" "Norwegian AS" "Austrian A" "Flybe" [16] "Wizz Air" "TAP Portugal" "Brussels A" "Finnair" "LOT Polish A" [21] "Vueling A" "Air Nostrum" "Air Lingus" "Germanwings" "Pegasus A" [26] "Netjets" "Transavia H" "Niki" "SunExpress" "Aegean A" [31] "Czech A" "European AT" "Malev HA" "Air Baltic" "Wideroe" [36] "TNT Airways" "Olympic Air" > L <- read.table("./AirTrans/airEU2013.lnk",header=TRUE) > head(L) u v r 1 1 2 1 2 1 38 1 3 2 1 1 4 2 7 1 5 2 8 1 6 2 10 1 > N <- read.table("./AirTrans/airports.txt",header=TRUE) > head(N) i code long lati 1 1 LCLK 33.630278 34.87889 2 2 EDDF 8.570555 50.03333 3 3 EDDK 7.142779 50.86583 4 4 EGNX -1.328055 52.83111 5 5 EGTE -3.413888 50.73444 6 6 LTBJ 27.155001 38.28917 > info <- list(network="AirEu2013", + title="Air Transportation Multiplex", + by="Cardillo A. et al.", + ref="Cardillo A. et al. Emergence of network features from multiplexity, Scientific Reports 3, 1344 (2013)", + href="http://complex.unizar.es/~atnmultiplex/", + creator="V. Batagelj", + date=date() ) > ways=list(airA="first airport",airB="second airport",line="airline") > nodes=list(airA=data.frame(ID=N$code,lon=N$long,lat=N$lati),airB=data.frame(ID=N$code),line=data.frame(ID=airID)) > links=data.frame(airA=L$u,airB=L$v,line=L$r,w=rep(1,length(L$u))) > MN <- list(format="MWnets",info=info,ways=ways,nodes=nodes,links=links,data=list()) > write(toJSON(MN),"./AirTrans/AirEu2013.json") > str(MN) List of 6 $ format: chr "MWnets" $ info :List of 7 ..$ network: chr "AirEu2013" ..$ title : chr "Air Transportation Multiplex" ..$ by : chr "Cardillo A. et al." ..$ ref : chr "Cardillo A. et al. Emergence of network features from multiplexity, Scientific Reports 3, 1344 (2013)" ..$ href : chr "http://complex.unizar.es/~atnmultiplex/" ..$ creator: chr "V. Batagelj" ..$ date : chr "Sun Nov 27 16:47:26 2022" $ ways :List of 3 ..$ airA: chr "first airport" ..$ airB: chr "second airport" ..$ line: chr "airline" $ nodes :List of 3 ..$ airA:'data.frame': 450 obs. of 3 variables: .. ..$ ID : chr [1:450] "LCLK" "EDDF" "EDDK" "EGNX" ... .. ..$ lon: num [1:450] 33.63 8.57 7.14 -1.33 -3.41 ... .. ..$ lat: num [1:450] 34.9 50 50.9 52.8 50.7 ... ..$ airB:'data.frame': 450 obs. of 1 variable: .. ..$ ID: chr [1:450] "LCLK" "EDDF" "EDDK" "EGNX" ... ..$ line:'data.frame': 37 obs. of 1 variable: .. ..$ ID: chr [1:37] "Lufthansa" "Ryanair" "Easyjet" "British A" ... $ links :'data.frame': 7176 obs. of 4 variables: ..$ airA: int [1:7176] 1 1 2 2 2 2 2 2 2 2 ... ..$ airB: int [1:7176] 2 38 1 7 8 10 14 15 17 18 ... ..$ line: int [1:7176] 1 1 1 1 1 1 1 1 1 1 ... ..$ w : num [1:7176] 1 1 1 1 1 1 1 1 1 1 ... $ data : list() ===== Adding additional data ===== Names of airports https://en.wikipedia.org/wiki/List_of_airports_by_ICAO_code:_L . I extracted the HTML code from the page to the file ICAO.xml: > wdir <- "D:/vlado/work2/MWnets" > setwd(wdir) > library(XML) > AL <- xmlParse(file="./data/ICAO.xml") > A <- xpathSApply(doc=AL,path="//li",xmlValue) > Encoding(A) <- "UTF-8" > head(A) [1] "LAFK – Tiranë Heliport – Tirana" [2] "LAGJ – Gjader Air Base – Gjader" [3] "LAKO – Korçë Northwest Airport – Korçë" [4] "LAKU (KFZ) – Kukës International Airport – Kukës" [5] "LAKV – Kuçovë Air Base – Kuçovë" [6] "LASK – Shkodër Airport – Shkodër (Shkodra)" > length(A) [1] 1234 > AP <- matrix("",nrow=n,ncol=3) > for(i in 1:n){ + S <- trimws(unlist(strsplit(A[i]," – "))) + if(length(S)>3) {cat(i,length(S),"\n"); + S <- c(S[1],paste(S[2]," - ",S[3],sep=""),S[length(S)])} else + if(length(S)==2) S <- c(S,S[2]) else + if(length(S)==1) S <- c(S,"","") else + if(length(S)==0) S <- c("","","") + AP[i,] <- S + } > N <- AP[,1] > N[nchar(N)>10] [1] "LFBC Cazaux [BA120] Cazaux, France" [2] "Planned: Lisbon / Alcochete / Peninsula de Setúbal" [3] "LTCM (NOP SIC)" [4] "Gaza Airstrip" [5] "Atarot Airport" [6] "LZMA — Martin Airfield — Martin" [7] "LZMC — Malacky Air Base" [8] "LZTN –Trenčín Airfield" > grep("LZMA",N) [1] 1215 > AP[1215] [1] "LZMA — Martin Airfield — Martin" There were irregularities and another page B for the northern airports ... Fortunately I found the page with a CSV file with the [[https://github.com/ip2location/ip2location-iata-icao/blob/master/iata-icao.csv|airports data]]. I started from scratch: > A <- read.csv("./data/iata-icao.csv",sep=",",header=TRUE) > dim(A) [1] 8983 7 > names(A) [1] "country_code" "region_name" "iata" "icao" "airport" "latitude" [7] "longitude" > head(A) country_code region_name iata icao airport latitude longitude 1 AE Ash Shariqah SHJ OMSJ Sharjah International Airport 25.32860 55.51720047 2 AE Abu Zaby AZI OMAD Al Bateen Executive Airport 24.42830 54.45809937 ... > id <- MN$nodes$airA$ID > head(id) [1] "LCLK" "EDDF" "EDDK" "EGNX" "EGTE" "LTBJ" > icao <- A$icao > head(icao) [1] "OMSJ" "OMAD" "OMFJ" "OMBY" "OMRK" "OMDW" > p <- match(id,icao) > length(p) [1] 450 > p > j <- which(is.na(p)) > id[j] [1] "XXXX" "LERJ" "EGNS" "EGJJ" "EGJB" "EGHQ" "EDFE" "YYYY" "EGMH" "LPEV" "EGLD" Some entries were missing. I Googled them and appended them to the CSV file. I had to convert some coordinates into real numbers > decim <- function(d,m,s) return(d+(m+s/60)/60) > decim(38,32,0.5) [1] 38.53347 > decim(7,53,22.7) [1] 7.889639 > A <- read.csv("./data/iata-icao.csv",sep=",",header=TRUE,skip=4) > dim(A) [1] 8994 7 > id <- MN$nodes$airA$ID > icao <- A$icao > p <- match(id,icao) > length(p) [1] 450 > (j <- which(is.na(p))) integer(0) > B <- A[p,] > dim(B) [1] 450 7 > head(B) country_code region_name iata icao airport latitude longitude 2537 CY Larnaka LCA LCLK Larnaca International Airport 34.87510 33.62490082 2607 DE Hessen FRA EDDF Frankfurt Airport 50.03333 8.570556 2567 DE Nordrhein-Westfalen CGN EDDK Cologne Bonn Airport 50.86590 7.142739773 3205 GB England EMA EGNX East Midlands Airport 52.83110 -1.328060031 3162 GB England EXT EGTE Exeter International Airport 50.73440 -3.413889885 6537 TR Izmir ADB LTBJ Adnan Menderes Airport 38.29240 27.15699959 > head(MN$nodes$airA) ID lon lat 1 LCLK 33.6303 34.8789 2 EDDF 8.5706 50.0333 3 EDDK 7.1428 50.8658 4 EGNX -1.3281 52.8311 5 EGTE -3.4139 50.7344 6 LTBJ 27.1550 38.2892 > MN$nodes$airA$iata <- B$iata > MN$nodes$airA$long <- B$airport > MN$nodes$airA$region <- B$region_name > MN$nodes$airA$country <- B$country_code > head(MN$nodes$airA) ID lon lat iata long region country 1 LCLK 33.6303 34.8789 LCA Larnaca International Airport Larnaka CY 2 EDDF 8.5706 50.0333 FRA Frankfurt Airport Hessen DE 3 EDDK 7.1428 50.8658 CGN Cologne Bonn Airport Nordrhein-Westfalen DE 4 EGNX -1.3281 52.8311 EMA East Midlands Airport England GB 5 EGTE -3.4139 50.7344 EXT Exeter International Airport England GB 6 LTBJ 27.1550 38.2892 ADB Adnan Menderes Airport Izmir TR > write(toJSON(MN),"./data/AirEu2013Ext.json") > str(MN) List of 6 $ format: chr "MWnets" $ info :List of 7 ..$ network: chr "AirEu2013" ..$ title : chr "Air Transportation Multiplex" ..$ by : chr "Cardillo A. et al." ..$ ref : chr "Cardillo A. et al. Emergence of network features from multiplexity, Scientific Reports 3, 1344 (2013)" ..$ href : chr "http://complex.unizar.es/~atnmultiplex/" ..$ creator: chr "V. Batagelj" ..$ date : chr "Sun Nov 27 16:47:26 2022" $ ways :List of 3 ..$ airA: chr "first airport" ..$ airB: chr "second airport" ..$ line: chr "airline" $ nodes :List of 3 ..$ airA:'data.frame': 450 obs. of 7 variables: .. ..$ ID : chr [1:450] "LCLK" "EDDF" "EDDK" "EGNX" ... .. ..$ lon : num [1:450] 33.63 8.57 7.14 -1.33 -3.41 ... .. ..$ lat : num [1:450] 34.9 50 50.9 52.8 50.7 ... .. ..$ iata : chr [1:450] "LCA" "FRA" "CGN" "EMA" ... .. ..$ long : chr [1:450] "Larnaca International Airport" "Frankfurt Airport" "Cologne Bonn Airport" "East Midlands Airport" ... .. ..$ region : chr [1:450] "Larnaka" "Hessen" "Nordrhein-Westfalen" "England" ... .. ..$ country: chr [1:450] "CY" "DE" "DE" "GB" ... ..$ airB:'data.frame': 450 obs. of 1 variable: .. ..$ ID: chr [1:450] "LCLK" "EDDF" "EDDK" "EGNX" ... ..$ line:'data.frame': 37 obs. of 1 variable: .. ..$ ID: chr [1:37] "Lufthansa" "Ryanair" "Easyjet" "British A" ... $ links :'data.frame': 7176 obs. of 4 variables: ..$ airA: int [1:7176] 1 1 2 2 2 2 2 2 2 2 ... ..$ airB: int [1:7176] 2 38 1 7 8 10 14 15 17 18 ... ..$ line: int [1:7176] 1 1 1 1 1 1 1 1 1 1 ... ..$ w : int [1:7176] 1 1 1 1 1 1 1 1 1 1 ... $ data : list() ===== Additional country data ===== A csv file with all world country data is available on Github [[https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv|ISO-3166]]. I extracted the interesting data about countries from our data set. I added to csv an additional entry for the "country" XX. > wdir <- "D:/vlado/work2/MWnets" > setwd(wdir) > C <- read.csv("./data/ISO-3166.csv",header=TRUE,skip=1) > head(C) name alpha.2 alpha.3 country.code iso_3166.2 region sub.region 1 Afghanistan AF AFG 4 ISO 3166-2:AF Asia Southern Asia 2 Ã…land Islands AX ALA 248 ISO 3166-2:AX Europe Northern Europe 3 Albania AL ALB 8 ISO 3166-2:AL Europe Southern Europe 4 Algeria DZ DZA 12 ISO 3166-2:DZ Africa Northern Africa 5 American Samoa AS ASM 16 ISO 3166-2:AS Oceania Polynesia 6 Andorra AD AND 20 ISO 3166-2:AD Europe Southern Europe intermediate.region region.code sub.region.code intermediate.region.code 1 142 34 NA 2 150 154 NA 3 150 39 NA 4 2 15 NA 5 9 61 NA 6 150 39 NA > Ccode <- factor(MN$nodes$airA$country) > CE <- levels(Ccode) > (q <- match(CE,C$alpha.2)) > Encoding(C) <- "UTF-8" > Eu <- C[q,c(2,3,4,1,6,7)] > names(Eu) <- c("alpha2","alpha3","Ccode","long","region","subregion") > head(Eu) alpha2 alpha3 Ccode long region subregion 15 AT AUT 40 Austria Europe Western Europe 29 BA BIH 70 Bosnia and Herzegovina Europe Southern Europe 22 BE BEL 56 Belgium Europe Western Europe 35 BG BGR 100 Bulgaria Europe Eastern Europe 216 CH CHE 756 Switzerland Europe Western Europe 59 CY CYP 196 Cyprus Asia Western Asia > MN$data$Eu <- Eu > write(toJSON(MN),"./data/AirEu2013Ext.json") The new structure of our multiway network is > str(MN) List of 6 $ format: chr "MWnets" $ info :List of 7 ..$ network: chr "AirEu2013" ..$ title : chr "Air Transportation Multiplex" ..$ by : chr "Cardillo A. et al." ..$ ref : chr "Cardillo A. et al. Emergence of network features from multiplexity, Scientific Reports 3, 1344 (2013)" ..$ href : chr "http://complex.unizar.es/~atnmultiplex/" ..$ creator: chr "V. Batagelj" ..$ date : chr "Sun Nov 27 16:47:26 2022" $ ways :List of 3 ..$ airA: chr "first airport" ..$ airB: chr "second airport" ..$ line: chr "airline" $ nodes :List of 3 ..$ airA:'data.frame': 450 obs. of 7 variables: .. ..$ ID : chr [1:450] "LCLK" "EDDF" "EDDK" "EGNX" ... .. ..$ lon : num [1:450] 33.63 8.57 7.14 -1.33 -3.41 ... .. ..$ lat : num [1:450] 34.9 50 50.9 52.8 50.7 ... .. ..$ iata : chr [1:450] "LCA" "FRA" "CGN" "EMA" ... .. ..$ long : chr [1:450] "Larnaca International Airport" "Frankfurt Airport" "Cologne Bonn Airport" "East Midlands Airport" ... .. ..$ region : chr [1:450] "Larnaka" "Hessen" "Nordrhein-Westfalen" "England" ... .. ..$ country: chr [1:450] "CY" "DE" "DE" "GB" ... ..$ airB:'data.frame': 450 obs. of 2 variables: .. ..$ ID : chr [1:450] "LCLK" "EDDF" "EDDK" "EGNX" ... .. ..$ long: chr [1:450] "Larnaca International Airport" "Frankfurt Airport" "Cologne Bonn Airport" "East Midlands Airport" ... ..$ line:'data.frame': 37 obs. of 1 variable: .. ..$ ID: chr [1:37] "Lufthansa" "Ryanair" "Easyjet" "British A" ... $ links :'data.frame': 7176 obs. of 4 variables: ..$ airA: int [1:7176] 1 1 2 2 2 2 2 2 2 2 ... ..$ airB: int [1:7176] 2 38 1 7 8 10 14 15 17 18 ... ..$ line: int [1:7176] 1 1 1 1 1 1 1 1 1 1 ... ..$ w : int [1:7176] 1 1 1 1 1 1 1 1 1 1 ... $ data :List of 1 ..$ Eu:'data.frame': 40 obs. of 6 variables: .. ..$ alpha2 : chr [1:40] "AT" "BA" "BE" "BG" ... .. ..$ alpha3 : chr [1:40] "AUT" "BIH" "BEL" "BGR" ... .. ..$ Ccode : int [1:40] 40 70 56 100 756 196 203 276 208 233 ... .. ..$ long : chr [1:40] "Austria" "Bosnia and Herzegovina" "Belgium" "Bulgaria" ... .. ..$ region : chr [1:40] "Europe" "Europe" "Europe" "Europe" ... .. ..$ subregion: chr [1:40] "Western Europe" "Southern Europe" "Western Europe" "Eastern Europe" ... ===== To do ===== The data contain Iran !? Some country names are very long "United Kingdom of Great Britain and Northern Ireland", "Iran (Islamic Republic of)", "Svalbard and Jan Mayen", ... Make long airport names shorter: Airport -> Ap, Airfield -> Af, International Airport -> IA, ... > N <- gsub("Air Base","AB",gsub("National A","NA",gsub("International A","IA", + gsub("Airport","A",MN$nodes$airA$long)))) > MN$nodes$airA$long <- MN$nodes$airB$long <- N