> setwd("C:/Users/vlado/DL/data/kaggle/Olympic/2016") > library(jsonlite) > source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R") > source("https://raw.githubusercontent.com/bavla/ibm3m/master/multiway/MWnets.R") > OL <- read.csv("athlete_events.csv") > dim(OL) [1] 271116 15 > str(OL) 'data.frame': 271116 obs. of 15 variables: $ ID : int 1 2 3 4 5 5 5 5 5 5 ... $ Name : chr "A Dijiang" "A Lamusi" "Gunnar Nielsen Aaby" "Edgar Lindenau Aabye" ... $ Sex : chr "M" "M" "M" "M" ... $ Age : int 24 23 24 34 21 21 25 25 27 27 ... $ Height: int 180 170 NA NA 185 185 185 185 185 185 ... $ Weight: num 80 60 NA NA 82 82 82 82 82 82 ... $ Team : chr "China" "China" "Denmark" "Denmark/Sweden" ... $ NOC : chr "CHN" "CHN" "DEN" "DEN" ... $ Games : chr "1992 Summer" "2012 Summer" "1920 Summer" "1900 Summer" ... $ Year : int 1992 2012 1920 1900 1988 1988 1992 1992 1994 1994 ... $ Season: chr "Summer" "Summer" "Summer" "Summer" ... $ City : chr "Barcelona" "London" "Antwerpen" "Paris" ... $ Sport : chr "Basketball" "Judo" "Football" "Tug-Of-War" ... $ Event : chr "Basketball Men's Basketball" "Judo Men's Extra-Lightweight" "Football Men's Football" ... $ Medal : chr NA NA NA "Gold" ... > ng <- length(table(OL$Games)); city <- rep(NA,ng); ig <- factor(OL$Games) > for(i in 1:nrow(OL)) city[ig[i]] <- OL$City[i] > cbind(levels(ig),city) [1,] "1896 Summer" "Athina" [2,] "1900 Summer" "Paris" [3,] "1904 Summer" "St. Louis" ... [49,] "2012 Summer" "London" [50,] "2014 Winter" "Sochi" [51,] "2016 Summer" "Rio de Janeiro" > OM <- OL[!is.na(OL$Medal),] > str(OM) 'data.frame': 39783 obs. of 15 variables: $ ID : int 4 15 15 16 17 17 17 17 17 20 ... $ Name : chr "Edgar Lindenau Aabye" "Arvo Ossian Aaltonen" "Arvo Ossian Aaltonen" "Juhamatti Tapio Aaltonen" ... $ Sex : chr "M" "M" "M" "M" ... $ Age : int 34 30 30 28 28 28 28 28 32 20 ... $ Height: int NA NA NA 184 175 175 175 175 175 176 ... $ Weight: num NA NA NA 85 64 64 64 64 64 85 ... $ Team : chr "Denmark/Sweden" "Finland" "Finland" "Finland" ... $ NOC : chr "DEN" "FIN" "FIN" "FIN" ... $ Games : chr "1900 Summer" "1920 Summer" "1920 Summer" "2014 Winter" ... $ Year : int 1900 1920 1920 2014 1948 1948 1948 1948 1952 1992 ... $ Season: chr "Summer" "Summer" "Summer" "Winter" ... $ City : chr "Paris" "Antwerpen" "Antwerpen" "Sochi" ... $ Sport : chr "Tug-Of-War" "Swimming" "Swimming" "Ice Hockey" ... $ Event : chr "Tug-Of-War Men's Tug-Of-War" "Swimming Men's 200 metres Breaststroke" "Swimming Men's 400 metres " ... $ Medal : chr "Gold" "Bronze" "Bronze" "Bronze" ... > write.csv(OM,file="medals2016.csv",fileEncoding="UTF-8") > OS <- OM[OM$Season=="Summer",] > str(OS) 'data.frame': 34088 obs. of 15 variables: $ ID : int 4 15 15 17 17 17 17 17 21 25 ... $ Name : chr "Edgar Lindenau Aabye" "Arvo Ossian Aaltonen" "Arvo Ossian Aaltonen" "Paavo Johannes Aaltonen" ... $ Sex : chr "M" "M" "M" "M" ... $ Age : int 34 30 30 28 28 28 28 32 27 24 ... $ Height: int NA NA NA 175 175 175 175 175 163 NA ... $ Weight: num NA NA NA 64 64 64 64 64 NA NA ... $ Team : chr "Denmark/Sweden" "Finland" "Finland" "Finland" ... $ NOC : chr "DEN" "FIN" "FIN" "FIN" ... $ Games : chr "1900 Summer" "1920 Summer" "1920 Summer" "1948 Summer" ... $ Year : int 1900 1920 1920 1948 1948 1948 1948 1952 2008 1920 ... $ Season: chr "Summer" "Summer" "Summer" "Summer" ... $ City : chr "Paris" "Antwerpen" "Antwerpen" "London" ... $ Sport : chr "Tug-Of-War" "Swimming" "Swimming" "Gymnastics" ... $ Event : chr "Tug-Of-War Men's Tug-Of-War" "Swimming Men's 200 metres Breaststroke" "Swimming Men's 400 metres" ... $ Medal : chr "Gold" "Bronze" "Bronze" "Bronze" ... > write.csv(OS,file="Smedals2016.csv",fileEncoding="UTF-8") > OW <- OM[OM$Season=="Winter",] > str(OW) 'data.frame': 5695 obs. of 15 variables: $ ID : int 16 20 20 20 20 20 20 20 20 40 ... $ Name : chr "Juhamatti Tapio Aaltonen" "Kjetil Andr Aamodt" "Kjetil Andr Aamodt" "Kjetil Andr Aamodt" ... $ Sex : chr "M" "M" "M" "M" ... $ Age : int 28 20 20 22 22 22 30 30 34 23 ... $ Height: int 184 176 176 176 176 176 176 176 176 NA ... $ Weight: num 85 85 85 85 85 85 85 85 85 NA ... $ Team : chr "Finland" "Norway" "Norway" "Norway" ... $ NOC : chr "FIN" "NOR" "NOR" "NOR" ... $ Games : chr "2014 Winter" "1992 Winter" "1992 Winter" "1994 Winter" ... $ Year : int 2014 1992 1992 1994 1994 1994 2002 2002 2006 1952 ... $ Season: chr "Winter" "Winter" "Winter" "Winter" ... $ City : chr "Sochi" "Albertville" "Albertville" "Lillehammer" ... $ Sport : chr "Ice Hockey" "Alpine Skiing" "Alpine Skiing" "Alpine Skiing" ... $ Event : chr "Ice Hockey Men's Ice Hockey" "Alpine Skiing Men's Super G" "Alpine Skiing Men's Giant Slalom" ... $ Medal : chr "Bronze" "Gold" "Bronze" "Silver" ... > write.csv(OW,file="Wmedals2016.csv",fileEncoding="UTF-8") > MT <- DF2MWN(OS,c("Name","Games","Team","NOC","Year","Sport","Event","Sex","Medal"), + w=c("Age","Height","Weight"),network="Olympic16S0",title="Summer Olympic medals till 2016") > imed <- c(1,3,2) > MT$links$Medal <- imed[MT$links$Medal] > MT$nodes$Medal$ID <- c("Bronze","Silver","Gold") > str(MT) List of 6 $ format: chr "MWnets" $ info :List of 4 ..$ network: chr "Olympic16S0" ..$ title : chr "Summer Olympic medals till 2016" ..$ by : chr "DF2MWN" ..$ date : chr "Mon Feb 6 01:23:56 2023" $ ways :List of 9 ..$ Name : chr "Name" ..$ Games: chr "Games" ..$ Team : chr "Team" ..$ NOC : chr "NOC" ..$ Year : chr "Year" ..$ Sport: chr "Sport" ..$ Event: chr "Event" ..$ Sex : chr "Sex" ..$ Medal: chr "Medal" $ nodes :List of 9 ..$ Name :'data.frame': 24545 obs. of 1 variable: .. ..$ ID: chr [1:24545] "A. Albert" "A. Dubois" "A. Joshua \"Josh\" West" "A. Lawry" ... ..$ Games:'data.frame': 29 obs. of 1 variable: .. ..$ ID: chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ... ..$ Team :'data.frame': 480 obs. of 1 variable: .. ..$ ID: chr [1:480] "A North American Team" "Afghanistan" "Algeria" "Ali-Baba II" ... ..$ NOC :'data.frame': 147 obs. of 1 variable: .. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ... ..$ Year :'data.frame': 29 obs. of 1 variable: .. ..$ ID: chr [1:29] "1896" "1900" "1904" "1906" ... ..$ Sport:'data.frame': 52 obs. of 1 variable: .. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ... ..$ Event:'data.frame': 642 obs. of 1 variable: .. ..$ ID: chr [1:642] "Aeronautics Mixed Aeronautics" "Alpinism Mixed Alpinism" "Archery Men's Au Chapelet, 33 metres" ... ..$ Sex :'data.frame': 2 obs. of 1 variable: .. ..$ ID: chr [1:2] "F" "M" ..$ Medal:'data.frame': 3 obs. of 1 variable: .. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold" $ links :'data.frame': 34088 obs. of 13 variables: ..$ one : num [1:34088] 1 1 1 1 1 1 1 1 1 1 ... ..$ Name : int [1:34088] 5273 2144 2144 17644 17644 17644 17644 17644 18628 777 ... ..$ Games : int [1:34088] 2 7 7 12 12 12 12 13 27 7 ... ..$ Team : int [1:34088] 105 141 141 141 141 141 141 141 312 312 ... ..$ NOC : int [1:34088] 32 43 43 43 43 43 43 43 97 97 ... ..$ Year : int [1:34088] 2 7 7 12 12 12 12 13 27 7 ... ..$ Sport : int [1:34088] 48 41 41 22 22 22 22 22 23 22 ... ..$ Event : int [1:34088] 587 514 527 298 309 297 305 309 321 310 ... ..$ Sex : int [1:34088] 2 2 2 2 2 2 2 2 1 2 ... ..$ Medal : num [1:34088] 3 1 1 1 3 3 3 1 3 2 ... ..$ Age : int [1:34088] 34 30 30 28 28 28 28 32 27 24 ... ..$ Height: int [1:34088] NA NA NA 175 175 175 175 175 163 NA ... ..$ Weight: num [1:34088] NA NA NA 64 64 64 64 64 NA NA ... $ data : list() > write(toJSON(MT),"Olympics16S0.json") > MD <- MT > L <- MD$links[,!(names(MD$links) %in% c("Name","Year","Event","Team"))] > Links <- aggregate(L[,c("one","Age","Height","Weight")],by=list(L$Games,L$NOC,L$Sport,L$Sex,L$Medal),FUN=sum) > names(Links) <- c("Games","NOC","Sport","Sex","Medal","w","Age","Height","Weight") > MD$links <- Links > ways <- c("Games","NOC","Sport","Sex","Medal"); Ways <- as.list(ways); names(Ways) <- ways > MD$ways <- Ways; N <- MT$nodes > MD$nodes <- list(N$Games,N$NOC,N$Sport,N$Sex,N$Medal) > names(MD$nodes) <- ways > MD$info$network <- "Olympic16S" > str(MD) List of 6 $ format: chr "MWnets" $ info :List of 4 ..$ network: chr "Olympic16S" ..$ title : chr "Summer Olympic medals till 2016" ..$ by : chr "DF2MWN" ..$ date : chr "Mon Feb 6 01:23:56 2023" $ ways :List of 5 ..$ Games: chr "Games" ..$ NOC : chr "NOC" ..$ Sport: chr "Sport" ..$ Sex : chr "Sex" ..$ Medal: chr "Medal" $ nodes :List of 5 ..$ Games:'data.frame': 29 obs. of 1 variable: .. ..$ ID: chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ... ..$ NOC :'data.frame': 147 obs. of 1 variable: .. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ... ..$ Sport:'data.frame': 52 obs. of 1 variable: .. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ... ..$ Sex :'data.frame': 2 obs. of 1 variable: .. ..$ ID: chr [1:2] "F" "M" ..$ Medal:'data.frame': 3 obs. of 1 variable: .. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold" $ links :'data.frame': 10429 obs. of 9 variables: ..$ Games : int [1:10429] 23 20 27 5 26 25 28 21 22 25 ... ..$ NOC : int [1:10429] 41 43 44 47 47 50 71 75 75 75 ... ..$ Sport : int [1:10429] 3 3 3 3 3 3 3 3 3 3 ... ..$ Sex : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ... ..$ Medal : num [1:10429] 1 1 1 1 1 1 1 1 1 1 ... ..$ w : num [1:10429] 4 1 3 1 1 3 3 1 1 1 ... ..$ Age : int [1:10429] 96 27 86 40 32 99 67 22 17 29 ... ..$ Height: int [1:10429] 674 171 492 NA 170 509 500 164 170 165 ... ..$ Weight: num [1:10429] 254 61 189 NA 73 203 180 54 68 58 ... $ data : list() > write(toJSON(MD),"Olympics16S.json")
Additional data about the Olympics are available in Wikipedia. I copied the table into a text file places.txt
and edit it.
> G <- read.csv("../places.txt",sep=";",skip=1,head=TRUE,strip.white=TRUE) > GG <- G[(G$Summer!="")&(G$open!=""),c("City","Year","Summer","Country","Region")] > OL$nodes$Games$City <- GG$City[1:29] > OL$nodes$Games$Year <- GG$Year[1:29] > OL$nodes$Games$Num <- GG$Summer[1:29] > OL$nodes$Games$Country <- GG$Country[1:29] > OL$nodes$Games$Region <- GG$Region[1:29] > str(OL) List of 6 $ format: chr "MWnets" $ info :List of 4 ..$ network: chr "Olympic16S" ..$ title : chr "Summer Olympic medals till 2016" ..$ by : chr "DF2MWN" ..$ date : chr "Mon Feb 6 01:23:56 2023" $ ways :List of 5 ..$ Games: chr "Games" ..$ NOC : chr "NOC" ..$ Sport: chr "Sport" ..$ Sex : chr "Sex" ..$ Medal: chr "Medal" $ nodes :List of 5 ..$ Games:'data.frame': 29 obs. of 6 variables: .. ..$ ID : chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ... .. ..$ City : chr [1:29] "Athens" "Paris" "St. Louis" "Athens" ... .. ..$ Year : int [1:29] 1896 1900 1904 1906 1908 1912 1920 1924 1928 1932 ... .. ..$ Num : chr [1:29] "I" "II" "III" "Intercalated" ... .. ..$ Country: chr [1:29] "Greece" "France" "United States" "Greece" ... .. ..$ Region : chr [1:29] "Europe" "Europe" "North America" "Europe" ... ..$ NOC :'data.frame': 147 obs. of 1 variable: .. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ... ..$ Sport:'data.frame': 52 obs. of 1 variable: .. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ... ..$ Sex :'data.frame': 2 obs. of 1 variable: .. ..$ ID: chr [1:2] "F" "M" ..$ Medal:'data.frame': 3 obs. of 1 variable: .. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold" $ links :'data.frame': 10429 obs. of 9 variables: ..$ Games : int [1:10429] 23 20 27 5 26 25 28 21 22 25 ... ..$ NOC : int [1:10429] 41 43 44 47 47 50 71 75 75 75 ... ..$ Sport : int [1:10429] 3 3 3 3 3 3 3 3 3 3 ... ..$ Sex : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ... ..$ Medal : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ... ..$ w : int [1:10429] 4 1 3 1 1 3 3 1 1 1 ... ..$ Age : int [1:10429] 96 27 86 40 32 99 67 22 17 29 ... ..$ Height: int [1:10429] 674 171 492 NA 170 509 500 164 170 165 ... ..$ Weight: num [1:10429] 254 61 189 NA 73 203 180 54 68 58 ... $ data : list() > write(toJSON(OL),"Olympics16S.json")