====== Summer Olympic medals till 2016 ======
===== Converting to JSON =====
> setwd("C:/Users/vlado/DL/data/kaggle/Olympic/2016")
> library(jsonlite)
> source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R")
> source("https://raw.githubusercontent.com/bavla/ibm3m/master/multiway/MWnets.R")
> OL <- read.csv("athlete_events.csv")
> dim(OL)
[1] 271116 15
> str(OL)
'data.frame': 271116 obs. of 15 variables:
$ ID : int 1 2 3 4 5 5 5 5 5 5 ...
$ Name : chr "A Dijiang" "A Lamusi" "Gunnar Nielsen Aaby" "Edgar Lindenau Aabye" ...
$ Sex : chr "M" "M" "M" "M" ...
$ Age : int 24 23 24 34 21 21 25 25 27 27 ...
$ Height: int 180 170 NA NA 185 185 185 185 185 185 ...
$ Weight: num 80 60 NA NA 82 82 82 82 82 82 ...
$ Team : chr "China" "China" "Denmark" "Denmark/Sweden" ...
$ NOC : chr "CHN" "CHN" "DEN" "DEN" ...
$ Games : chr "1992 Summer" "2012 Summer" "1920 Summer" "1900 Summer" ...
$ Year : int 1992 2012 1920 1900 1988 1988 1992 1992 1994 1994 ...
$ Season: chr "Summer" "Summer" "Summer" "Summer" ...
$ City : chr "Barcelona" "London" "Antwerpen" "Paris" ...
$ Sport : chr "Basketball" "Judo" "Football" "Tug-Of-War" ...
$ Event : chr "Basketball Men's Basketball" "Judo Men's Extra-Lightweight" "Football Men's Football" ...
$ Medal : chr NA NA NA "Gold" ...
> ng <- length(table(OL$Games)); city <- rep(NA,ng); ig <- factor(OL$Games)
> for(i in 1:nrow(OL)) city[ig[i]] <- OL$City[i]
> cbind(levels(ig),city)
[1,] "1896 Summer" "Athina"
[2,] "1900 Summer" "Paris"
[3,] "1904 Summer" "St. Louis"
...
[49,] "2012 Summer" "London"
[50,] "2014 Winter" "Sochi"
[51,] "2016 Summer" "Rio de Janeiro"
> OM <- OL[!is.na(OL$Medal),]
> str(OM)
'data.frame': 39783 obs. of 15 variables:
$ ID : int 4 15 15 16 17 17 17 17 17 20 ...
$ Name : chr "Edgar Lindenau Aabye" "Arvo Ossian Aaltonen" "Arvo Ossian Aaltonen" "Juhamatti Tapio Aaltonen" ...
$ Sex : chr "M" "M" "M" "M" ...
$ Age : int 34 30 30 28 28 28 28 28 32 20 ...
$ Height: int NA NA NA 184 175 175 175 175 175 176 ...
$ Weight: num NA NA NA 85 64 64 64 64 64 85 ...
$ Team : chr "Denmark/Sweden" "Finland" "Finland" "Finland" ...
$ NOC : chr "DEN" "FIN" "FIN" "FIN" ...
$ Games : chr "1900 Summer" "1920 Summer" "1920 Summer" "2014 Winter" ...
$ Year : int 1900 1920 1920 2014 1948 1948 1948 1948 1952 1992 ...
$ Season: chr "Summer" "Summer" "Summer" "Winter" ...
$ City : chr "Paris" "Antwerpen" "Antwerpen" "Sochi" ...
$ Sport : chr "Tug-Of-War" "Swimming" "Swimming" "Ice Hockey" ...
$ Event : chr "Tug-Of-War Men's Tug-Of-War" "Swimming Men's 200 metres Breaststroke" "Swimming Men's 400 metres " ...
$ Medal : chr "Gold" "Bronze" "Bronze" "Bronze" ...
> write.csv(OM,file="medals2016.csv",fileEncoding="UTF-8")
> OS <- OM[OM$Season=="Summer",]
> str(OS)
'data.frame': 34088 obs. of 15 variables:
$ ID : int 4 15 15 17 17 17 17 17 21 25 ...
$ Name : chr "Edgar Lindenau Aabye" "Arvo Ossian Aaltonen" "Arvo Ossian Aaltonen" "Paavo Johannes Aaltonen" ...
$ Sex : chr "M" "M" "M" "M" ...
$ Age : int 34 30 30 28 28 28 28 32 27 24 ...
$ Height: int NA NA NA 175 175 175 175 175 163 NA ...
$ Weight: num NA NA NA 64 64 64 64 64 NA NA ...
$ Team : chr "Denmark/Sweden" "Finland" "Finland" "Finland" ...
$ NOC : chr "DEN" "FIN" "FIN" "FIN" ...
$ Games : chr "1900 Summer" "1920 Summer" "1920 Summer" "1948 Summer" ...
$ Year : int 1900 1920 1920 1948 1948 1948 1948 1952 2008 1920 ...
$ Season: chr "Summer" "Summer" "Summer" "Summer" ...
$ City : chr "Paris" "Antwerpen" "Antwerpen" "London" ...
$ Sport : chr "Tug-Of-War" "Swimming" "Swimming" "Gymnastics" ...
$ Event : chr "Tug-Of-War Men's Tug-Of-War" "Swimming Men's 200 metres Breaststroke" "Swimming Men's 400 metres" ...
$ Medal : chr "Gold" "Bronze" "Bronze" "Bronze" ...
> write.csv(OS,file="Smedals2016.csv",fileEncoding="UTF-8")
> OW <- OM[OM$Season=="Winter",]
> str(OW)
'data.frame': 5695 obs. of 15 variables:
$ ID : int 16 20 20 20 20 20 20 20 20 40 ...
$ Name : chr "Juhamatti Tapio Aaltonen" "Kjetil Andr Aamodt" "Kjetil Andr Aamodt" "Kjetil Andr Aamodt" ...
$ Sex : chr "M" "M" "M" "M" ...
$ Age : int 28 20 20 22 22 22 30 30 34 23 ...
$ Height: int 184 176 176 176 176 176 176 176 176 NA ...
$ Weight: num 85 85 85 85 85 85 85 85 85 NA ...
$ Team : chr "Finland" "Norway" "Norway" "Norway" ...
$ NOC : chr "FIN" "NOR" "NOR" "NOR" ...
$ Games : chr "2014 Winter" "1992 Winter" "1992 Winter" "1994 Winter" ...
$ Year : int 2014 1992 1992 1994 1994 1994 2002 2002 2006 1952 ...
$ Season: chr "Winter" "Winter" "Winter" "Winter" ...
$ City : chr "Sochi" "Albertville" "Albertville" "Lillehammer" ...
$ Sport : chr "Ice Hockey" "Alpine Skiing" "Alpine Skiing" "Alpine Skiing" ...
$ Event : chr "Ice Hockey Men's Ice Hockey" "Alpine Skiing Men's Super G" "Alpine Skiing Men's Giant Slalom" ...
$ Medal : chr "Bronze" "Gold" "Bronze" "Silver" ...
> write.csv(OW,file="Wmedals2016.csv",fileEncoding="UTF-8")
> MT <- DF2MWN(OS,c("Name","Games","Team","NOC","Year","Sport","Event","Sex","Medal"),
+ w=c("Age","Height","Weight"),network="Olympic16S0",title="Summer Olympic medals till 2016")
> imed <- c(1,3,2)
> MT$links$Medal <- imed[MT$links$Medal]
> MT$nodes$Medal$ID <- c("Bronze","Silver","Gold")
> str(MT)
List of 6
$ format: chr "MWnets"
$ info :List of 4
..$ network: chr "Olympic16S0"
..$ title : chr "Summer Olympic medals till 2016"
..$ by : chr "DF2MWN"
..$ date : chr "Mon Feb 6 01:23:56 2023"
$ ways :List of 9
..$ Name : chr "Name"
..$ Games: chr "Games"
..$ Team : chr "Team"
..$ NOC : chr "NOC"
..$ Year : chr "Year"
..$ Sport: chr "Sport"
..$ Event: chr "Event"
..$ Sex : chr "Sex"
..$ Medal: chr "Medal"
$ nodes :List of 9
..$ Name :'data.frame': 24545 obs. of 1 variable:
.. ..$ ID: chr [1:24545] "A. Albert" "A. Dubois" "A. Joshua \"Josh\" West" "A. Lawry" ...
..$ Games:'data.frame': 29 obs. of 1 variable:
.. ..$ ID: chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ...
..$ Team :'data.frame': 480 obs. of 1 variable:
.. ..$ ID: chr [1:480] "A North American Team" "Afghanistan" "Algeria" "Ali-Baba II" ...
..$ NOC :'data.frame': 147 obs. of 1 variable:
.. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ...
..$ Year :'data.frame': 29 obs. of 1 variable:
.. ..$ ID: chr [1:29] "1896" "1900" "1904" "1906" ...
..$ Sport:'data.frame': 52 obs. of 1 variable:
.. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ...
..$ Event:'data.frame': 642 obs. of 1 variable:
.. ..$ ID: chr [1:642] "Aeronautics Mixed Aeronautics" "Alpinism Mixed Alpinism" "Archery Men's Au Chapelet, 33 metres" ...
..$ Sex :'data.frame': 2 obs. of 1 variable:
.. ..$ ID: chr [1:2] "F" "M"
..$ Medal:'data.frame': 3 obs. of 1 variable:
.. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold"
$ links :'data.frame': 34088 obs. of 13 variables:
..$ one : num [1:34088] 1 1 1 1 1 1 1 1 1 1 ...
..$ Name : int [1:34088] 5273 2144 2144 17644 17644 17644 17644 17644 18628 777 ...
..$ Games : int [1:34088] 2 7 7 12 12 12 12 13 27 7 ...
..$ Team : int [1:34088] 105 141 141 141 141 141 141 141 312 312 ...
..$ NOC : int [1:34088] 32 43 43 43 43 43 43 43 97 97 ...
..$ Year : int [1:34088] 2 7 7 12 12 12 12 13 27 7 ...
..$ Sport : int [1:34088] 48 41 41 22 22 22 22 22 23 22 ...
..$ Event : int [1:34088] 587 514 527 298 309 297 305 309 321 310 ...
..$ Sex : int [1:34088] 2 2 2 2 2 2 2 2 1 2 ...
..$ Medal : num [1:34088] 3 1 1 1 3 3 3 1 3 2 ...
..$ Age : int [1:34088] 34 30 30 28 28 28 28 32 27 24 ...
..$ Height: int [1:34088] NA NA NA 175 175 175 175 175 163 NA ...
..$ Weight: num [1:34088] NA NA NA 64 64 64 64 64 NA NA ...
$ data : list()
> write(toJSON(MT),"Olympics16S0.json")
> MD <- MT
> L <- MD$links[,!(names(MD$links) %in% c("Name","Year","Event","Team"))]
> Links <- aggregate(L[,c("one","Age","Height","Weight")],by=list(L$Games,L$NOC,L$Sport,L$Sex,L$Medal),FUN=sum)
> names(Links) <- c("Games","NOC","Sport","Sex","Medal","w","Age","Height","Weight")
> MD$links <- Links
> ways <- c("Games","NOC","Sport","Sex","Medal"); Ways <- as.list(ways); names(Ways) <- ways
> MD$ways <- Ways; N <- MT$nodes
> MD$nodes <- list(N$Games,N$NOC,N$Sport,N$Sex,N$Medal)
> names(MD$nodes) <- ways
> MD$info$network <- "Olympic16S"
> str(MD)
List of 6
$ format: chr "MWnets"
$ info :List of 4
..$ network: chr "Olympic16S"
..$ title : chr "Summer Olympic medals till 2016"
..$ by : chr "DF2MWN"
..$ date : chr "Mon Feb 6 01:23:56 2023"
$ ways :List of 5
..$ Games: chr "Games"
..$ NOC : chr "NOC"
..$ Sport: chr "Sport"
..$ Sex : chr "Sex"
..$ Medal: chr "Medal"
$ nodes :List of 5
..$ Games:'data.frame': 29 obs. of 1 variable:
.. ..$ ID: chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ...
..$ NOC :'data.frame': 147 obs. of 1 variable:
.. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ...
..$ Sport:'data.frame': 52 obs. of 1 variable:
.. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ...
..$ Sex :'data.frame': 2 obs. of 1 variable:
.. ..$ ID: chr [1:2] "F" "M"
..$ Medal:'data.frame': 3 obs. of 1 variable:
.. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold"
$ links :'data.frame': 10429 obs. of 9 variables:
..$ Games : int [1:10429] 23 20 27 5 26 25 28 21 22 25 ...
..$ NOC : int [1:10429] 41 43 44 47 47 50 71 75 75 75 ...
..$ Sport : int [1:10429] 3 3 3 3 3 3 3 3 3 3 ...
..$ Sex : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ...
..$ Medal : num [1:10429] 1 1 1 1 1 1 1 1 1 1 ...
..$ w : num [1:10429] 4 1 3 1 1 3 3 1 1 1 ...
..$ Age : int [1:10429] 96 27 86 40 32 99 67 22 17 29 ...
..$ Height: int [1:10429] 674 171 492 NA 170 509 500 164 170 165 ...
..$ Weight: num [1:10429] 254 61 189 NA 73 203 180 54 68 58 ...
$ data : list()
> write(toJSON(MD),"Olympics16S.json")
===== Additional data the Olympics =====
Additional data about the Olympics are available in [[https://en.wikipedia.org/wiki/List_of_Olympic_Games_host_cities|Wikipedia]]. I copied the table into a text file ''places.txt'' and edit it.
> G <- read.csv("../places.txt",sep=";",skip=1,head=TRUE,strip.white=TRUE)
> GG <- G[(G$Summer!="")&(G$open!=""),c("City","Year","Summer","Country","Region")]
> OL$nodes$Games$City <- GG$City[1:29]
> OL$nodes$Games$Year <- GG$Year[1:29]
> OL$nodes$Games$Num <- GG$Summer[1:29]
> OL$nodes$Games$Country <- GG$Country[1:29]
> OL$nodes$Games$Region <- GG$Region[1:29]
> str(OL)
List of 6
$ format: chr "MWnets"
$ info :List of 4
..$ network: chr "Olympic16S"
..$ title : chr "Summer Olympic medals till 2016"
..$ by : chr "DF2MWN"
..$ date : chr "Mon Feb 6 01:23:56 2023"
$ ways :List of 5
..$ Games: chr "Games"
..$ NOC : chr "NOC"
..$ Sport: chr "Sport"
..$ Sex : chr "Sex"
..$ Medal: chr "Medal"
$ nodes :List of 5
..$ Games:'data.frame': 29 obs. of 6 variables:
.. ..$ ID : chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ...
.. ..$ City : chr [1:29] "Athens" "Paris" "St. Louis" "Athens" ...
.. ..$ Year : int [1:29] 1896 1900 1904 1906 1908 1912 1920 1924 1928 1932 ...
.. ..$ Num : chr [1:29] "I" "II" "III" "Intercalated" ...
.. ..$ Country: chr [1:29] "Greece" "France" "United States" "Greece" ...
.. ..$ Region : chr [1:29] "Europe" "Europe" "North America" "Europe" ...
..$ NOC :'data.frame': 147 obs. of 1 variable:
.. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ...
..$ Sport:'data.frame': 52 obs. of 1 variable:
.. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ...
..$ Sex :'data.frame': 2 obs. of 1 variable:
.. ..$ ID: chr [1:2] "F" "M"
..$ Medal:'data.frame': 3 obs. of 1 variable:
.. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold"
$ links :'data.frame': 10429 obs. of 9 variables:
..$ Games : int [1:10429] 23 20 27 5 26 25 28 21 22 25 ...
..$ NOC : int [1:10429] 41 43 44 47 47 50 71 75 75 75 ...
..$ Sport : int [1:10429] 3 3 3 3 3 3 3 3 3 3 ...
..$ Sex : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ...
..$ Medal : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ...
..$ w : int [1:10429] 4 1 3 1 1 3 3 1 1 1 ...
..$ Age : int [1:10429] 96 27 86 40 32 99 67 22 17 29 ...
..$ Height: int [1:10429] 674 171 492 NA 170 509 500 164 170 165 ...
..$ Weight: num [1:10429] 254 61 189 NA 73 203 180 54 68 58 ...
$ data : list()
> write(toJSON(OL),"Olympics16S.json")
[[.:x3d:ol16srcore|Relational core]]