The data set All Year Olympic Dataset (with 2020 Tokyo Olympics).csv
is a reduced (some variables are missing) extension of the athletes_events.csv
till the 2020 Tokyo Olympics. I decided to use this file.
> setwd("C:/Users/vlado/DL/data/kaggle/Olympic/2020") > source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R") > source("https://raw.githubusercontent.com/bavla/ibm3m/master/multiway/MWnets.R") > OL <- read.csv("All Year Olympic Dataset (with 2020 Tokyo Olympics).csv") > dim(OL) [1] 275643 10 > str(OL) 'data.frame': 275643 obs. of 10 variables: $ X : int 0 1 2 3 4 5 6 7 8 9 ... $ Name : chr "A Dijiang" "A Lamusi" "Gunnar Nielsen Aaby" "Edgar Lindenau Aabye" ... $ Sex : chr "M" "M" "M" "M" ... $ Age : num 24 23 24 34 21 21 25 25 27 27 ... $ Team : chr "China" "China" "Denmark" "Denmark/Sweden" ... $ Games : chr "1992 Summer" "2012 Summer" "1920 Summer" "1900 Summer" ... $ Year : int 1992 2012 1920 1900 1988 1988 1992 1992 1994 1994 ... $ Season: chr "Summer" "Summer" "Summer" "Summer" ... $ Sport : chr "Basketball" "Judo" "Football" "Tug-Of-War" ... $ Medal : int 0 0 0 3 0 0 0 0 0 0 ... > OM <- OL[OL$Medal>0,] > str(OM) 'data.frame': 41404 obs. of 10 variables: $ X : int 3 37 38 40 41 42 44 48 50 60 ... $ Name : chr "Edgar Lindenau Aabye" "Arvo Ossian Aaltonen" "Arvo Ossian Aaltonen" "Juhamatti Tapio Aaltonen" ... $ Sex : chr "M" "M" "M" "M" ... $ Age : num 34 30 30 28 28 28 28 28 32 20 ... $ Team : chr "Denmark/Sweden" "Finland" "Finland" "Finland" ... $ Games : chr "1900 Summer" "1920 Summer" "1920 Summer" "2014 Winter" ... $ Year : int 1900 1920 1920 2014 1948 1948 1948 1948 1952 1992 ... $ Season: chr "Summer" "Summer" "Summer" "Winter" ... $ Sport : chr "Tug-Of-War" "Swimming" "Swimming" "Ice Hockey" ... $ Medal : int 3 1 1 1 1 3 3 3 1 3 ... > write.csv(OM,file="medals2020.csv",fileEncoding="UTF-8") > OS <- OM[OM$Season=="Summer",] > str(OS) 'data.frame': 35709 obs. of 10 variables: $ X : int 3 37 38 41 42 44 48 50 79 86 ... $ Name : chr "Edgar Lindenau Aabye" "Arvo Ossian Aaltonen" "Arvo Ossian Aaltonen" "Paavo Johannes Aaltonen" ... $ Sex : chr "M" "M" "M" "M" ... $ Age : num 34 30 30 28 28 28 28 32 27 24 ... $ Team : chr "Denmark/Sweden" "Finland" "Finland" "Finland" ... $ Games : chr "1900 Summer" "1920 Summer" "1920 Summer" "1948 Summer" ... $ Year : int 1900 1920 1920 1948 1948 1948 1948 1952 2008 1920 ... $ Season: chr "Summer" "Summer" "Summer" "Summer" ... $ Sport : chr "Tug-Of-War" "Swimming" "Swimming" "Gymnastics" ... $ Medal : int 3 1 1 1 3 3 3 1 3 2 ... > write.csv(OS,file="Smedals2020.csv",fileEncoding="UTF-8") > OW <- OM[OM$Season=="Winter",] > str(OW) 'data.frame': 5695 obs. of 10 variables: $ X : int 40 60 61 63 64 67 73 76 78 110 ... $ Name : chr "Juhamatti Tapio Aaltonen" "Kjetil Andr Aamodt" "Kjetil Andr Aamodt" "Kjetil Andr Aamodt" ... $ Sex : chr "M" "M" "M" "M" ... $ Age : num 28 20 20 22 22 22 30 30 34 23 ... $ Team : chr "Finland" "Norway" "Norway" "Norway" ... $ Games : chr "2014 Winter" "1992 Winter" "1992 Winter" "1994 Winter" ... $ Year : int 2014 1992 1992 1994 1994 1994 2002 2002 2006 1952 ... $ Season: chr "Winter" "Winter" "Winter" "Winter" ... $ Sport : chr "Ice Hockey" "Alpine Skiing" "Alpine Skiing" "Alpine Skiing" ... $ Medal : int 1 3 1 2 1 2 3 3 3 1 ... > write.csv(OW,file="Wmedals2020.csv",fileEncoding="UTF-8")
> medal <- c("Bronze","Silver","Gold") > OS$Medal <- medal[OS$Medal] > OS$Sex[OS$Sex=="M"] <- "Male" > OS$Sex[OS$Sex=="F"] <- "Female" > MT <- DF2MWN(OS,c("Name","Games","Team","Year","Sport","Sex","Medal"), + w="Age",network="Olympic20S0",title="Summer Olympic medals till 2020") > imed <- c(1,3,2) > MT$links$Medal <- imed[MT$links$Medal] > MT$nodes$Medal$ID <- c("Bronze","Silver","Gold") > str(MT) > str(MT) List of 6 $ format: chr "MWnets" $ info :List of 4 ..$ network: chr "Olympic20S0" ..$ title : chr "Summer Olympic medals till 2020" ..$ by : chr "DF2MWN" ..$ date : chr "Sat Feb 4 22:02:21 2023" $ ways :List of 7 ..$ Name : chr "Name" ..$ Games: chr "Games" ..$ Team : chr "Team" ..$ Year : chr "Year" ..$ Sport: chr "Sport" ..$ Sex : chr "Sex" ..$ Medal: chr "Medal" $ nodes :List of 7 ..$ Name :'data.frame': 26114 obs. of 1 variable: .. ..$ ID: chr [1:26114] "A. Albert" "A. Dubois" "A. Joshua \"Josh\" West" "A. Lawry" ... ..$ Games:'data.frame': 30 obs. of 1 variable: .. ..$ ID: chr [1:30] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ... ..$ Team :'data.frame': 489 obs. of 1 variable: .. ..$ ID: chr [1:489] "A North American Team" "Afghanistan" "Algeria" "Ali-Baba II" ... ..$ Year :'data.frame': 30 obs. of 1 variable: .. ..$ ID: chr [1:30] "1896" "1900" "1904" "1906" ... ..$ Sport:'data.frame': 65 obs. of 1 variable: .. ..$ ID: chr [1:65] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ... ..$ Sex :'data.frame': 2 obs. of 1 variable: .. ..$ ID: chr [1:2] "Female" "Male" ..$ Medal:'data.frame': 3 obs. of 1 variable: .. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold" $ links :'data.frame': 35709 obs. of 9 variables: ..$ one : num [1:35709] 1 1 1 1 1 1 1 1 1 1 ... ..$ Name : int [1:35709] 5630 2199 2199 18636 18636 18636 18636 18636 19726 807 ... ..$ Games: int [1:35709] 2 7 7 12 12 12 12 13 27 7 ... ..$ Team : int [1:35709] 106 143 143 143 143 143 143 143 316 316 ... ..$ Year : int [1:35709] 2 7 7 12 12 12 12 13 27 7 ... ..$ Sport: int [1:35709] 57 50 50 29 29 29 29 29 30 29 ... ..$ Sex : int [1:35709] 2 2 2 2 2 2 2 2 1 2 ... ..$ Medal: num [1:35709] 3 1 1 1 3 3 3 1 3 2 ... ..$ Age : num [1:35709] 34 30 30 28 28 28 28 32 27 24 ... $ data : list() > write(toJSON(MT),"Olympics20S0.json")
> MD <- MT > L <- MD$links[,!(names(MD$links) %in% c("Name","Year"))] > Links <- aggregate(L[,c("one","Age")],by=list(L$Games,L$Team,L$Sport,L$Sex,L$Medal),FUN=sum) > names(Links) <- c("Games","Team","Sport","Sex","Medal","w","Age") > MD$links <- Links > ways <- c("Games","Team","Sport","Sex","Medal"); Ways <- as.list(ways); names(Ways) <- ways > MD$ways <- Ways; N <- MT$nodes > MD$nodes <- list(N$Games,N$Team,N$Sport,N$Sex,N$Medal) > names(MD$nodes) <- ways > MD$info$network <- "Olympic20S" > str(MD) List of 6 $ format: chr "MWnets" $ info :List of 4 ..$ network: chr "Olympic20S" ..$ title : chr "Summer Olympic medals till 2020" ..$ by : chr "DF2MWN" ..$ date : chr "Sat Feb 4 22:02:21 2023" $ ways :List of 5 ..$ Games: chr "Games" ..$ Team : chr "Team" ..$ Sport: chr "Sport" ..$ Sex : chr "Sex" ..$ Medal: chr "Medal" $ nodes :List of 5 ..$ Games:'data.frame': 30 obs. of 1 variable: .. ..$ ID: chr [1:30] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ... ..$ Team :'data.frame': 489 obs. of 1 variable: .. ..$ ID: chr [1:489] "A North American Team" "Afghanistan" "Algeria" "Ali-Baba II" ... ..$ Sport:'data.frame': 65 obs. of 1 variable: .. ..$ ID: chr [1:65] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ... ..$ Sex :'data.frame': 2 obs. of 1 variable: .. ..$ ID: chr [1:2] "Female" "Male" ..$ Medal:'data.frame': 3 obs. of 1 variable: .. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold" $ links :'data.frame': 11392 obs. of 7 variables: ..$ Games: int [1:11392] 26 29 20 27 25 5 26 30 30 28 ... ..$ Team : int [1:11392] 80 80 143 147 163 168 168 168 213 216 ... ..$ Sport: int [1:11392] 3 3 3 3 3 3 3 3 3 3 ... ..$ Sex : int [1:11392] 1 1 1 1 1 1 1 1 1 1 ... ..$ Medal: num [1:11392] 1 1 1 1 1 1 1 1 1 1 ... ..$ w : num [1:11392] 3 3 1 3 3 1 1 1 1 3 ... ..$ Age : num [1:11392] 63 71 27 86 99 40 32 32 24 67 ... $ data : list() > write(toJSON(MD),"Olympics20S.json")