Table of Contents

Summer Olympic medals till 2016

Converting to JSON

> setwd("C:/Users/vlado/DL/data/kaggle/Olympic/2016")
> library(jsonlite)
> source("https://raw.githubusercontent.com/bavla/Rnet/master/R/Pajek.R")
> source("https://raw.githubusercontent.com/bavla/ibm3m/master/multiway/MWnets.R")
> OL <- read.csv("athlete_events.csv")
> dim(OL)
[1] 271116     15
> str(OL)
'data.frame':   271116 obs. of  15 variables:
 $ ID    : int  1 2 3 4 5 5 5 5 5 5 ...
 $ Name  : chr  "A Dijiang" "A Lamusi" "Gunnar Nielsen Aaby" "Edgar Lindenau Aabye" ...
 $ Sex   : chr  "M" "M" "M" "M" ...
 $ Age   : int  24 23 24 34 21 21 25 25 27 27 ...
 $ Height: int  180 170 NA NA 185 185 185 185 185 185 ...
 $ Weight: num  80 60 NA NA 82 82 82 82 82 82 ...
 $ Team  : chr  "China" "China" "Denmark" "Denmark/Sweden" ...
 $ NOC   : chr  "CHN" "CHN" "DEN" "DEN" ...
 $ Games : chr  "1992 Summer" "2012 Summer" "1920 Summer" "1900 Summer" ...
 $ Year  : int  1992 2012 1920 1900 1988 1988 1992 1992 1994 1994 ...
 $ Season: chr  "Summer" "Summer" "Summer" "Summer" ...
 $ City  : chr  "Barcelona" "London" "Antwerpen" "Paris" ...
 $ Sport : chr  "Basketball" "Judo" "Football" "Tug-Of-War" ...
 $ Event : chr  "Basketball Men's Basketball" "Judo Men's Extra-Lightweight" "Football Men's Football" ...
 $ Medal : chr  NA NA NA "Gold" ...
> ng <- length(table(OL$Games)); city <- rep(NA,ng); ig <- factor(OL$Games)
> for(i in 1:nrow(OL)) city[ig[i]] <- OL$City[i]
> cbind(levels(ig),city)
 [1,] "1896 Summer" "Athina"                
 [2,] "1900 Summer" "Paris"                 
 [3,] "1904 Summer" "St. Louis"   
...
[49,] "2012 Summer" "London"                
[50,] "2014 Winter" "Sochi"                 
[51,] "2016 Summer" "Rio de Janeiro" 
> OM <- OL[!is.na(OL$Medal),]
> str(OM)
'data.frame':   39783 obs. of  15 variables:
 $ ID    : int  4 15 15 16 17 17 17 17 17 20 ...
 $ Name  : chr  "Edgar Lindenau Aabye" "Arvo Ossian Aaltonen" "Arvo Ossian Aaltonen" "Juhamatti Tapio Aaltonen" ...
 $ Sex   : chr  "M" "M" "M" "M" ...
 $ Age   : int  34 30 30 28 28 28 28 28 32 20 ...
 $ Height: int  NA NA NA 184 175 175 175 175 175 176 ...
 $ Weight: num  NA NA NA 85 64 64 64 64 64 85 ...
 $ Team  : chr  "Denmark/Sweden" "Finland" "Finland" "Finland" ...
 $ NOC   : chr  "DEN" "FIN" "FIN" "FIN" ...
 $ Games : chr  "1900 Summer" "1920 Summer" "1920 Summer" "2014 Winter" ...
 $ Year  : int  1900 1920 1920 2014 1948 1948 1948 1948 1952 1992 ...
 $ Season: chr  "Summer" "Summer" "Summer" "Winter" ...
 $ City  : chr  "Paris" "Antwerpen" "Antwerpen" "Sochi" ...
 $ Sport : chr  "Tug-Of-War" "Swimming" "Swimming" "Ice Hockey" ...
 $ Event : chr  "Tug-Of-War Men's Tug-Of-War" "Swimming Men's 200 metres Breaststroke" "Swimming Men's 400 metres " ...
 $ Medal : chr  "Gold" "Bronze" "Bronze" "Bronze" ...
> write.csv(OM,file="medals2016.csv",fileEncoding="UTF-8")
> OS <- OM[OM$Season=="Summer",]
> str(OS)
'data.frame':   34088 obs. of  15 variables:
 $ ID    : int  4 15 15 17 17 17 17 17 21 25 ...
 $ Name  : chr  "Edgar Lindenau Aabye" "Arvo Ossian Aaltonen" "Arvo Ossian Aaltonen" "Paavo Johannes Aaltonen" ...
 $ Sex   : chr  "M" "M" "M" "M" ...
 $ Age   : int  34 30 30 28 28 28 28 32 27 24 ...
 $ Height: int  NA NA NA 175 175 175 175 175 163 NA ...
 $ Weight: num  NA NA NA 64 64 64 64 64 NA NA ...
 $ Team  : chr  "Denmark/Sweden" "Finland" "Finland" "Finland" ...
 $ NOC   : chr  "DEN" "FIN" "FIN" "FIN" ...
 $ Games : chr  "1900 Summer" "1920 Summer" "1920 Summer" "1948 Summer" ...
 $ Year  : int  1900 1920 1920 1948 1948 1948 1948 1952 2008 1920 ...
 $ Season: chr  "Summer" "Summer" "Summer" "Summer" ...
 $ City  : chr  "Paris" "Antwerpen" "Antwerpen" "London" ...
 $ Sport : chr  "Tug-Of-War" "Swimming" "Swimming" "Gymnastics" ...
 $ Event : chr  "Tug-Of-War Men's Tug-Of-War" "Swimming Men's 200 metres Breaststroke" "Swimming Men's 400 metres" ...
 $ Medal : chr  "Gold" "Bronze" "Bronze" "Bronze" ...
> write.csv(OS,file="Smedals2016.csv",fileEncoding="UTF-8")
> OW <- OM[OM$Season=="Winter",]
> str(OW)
'data.frame':   5695 obs. of  15 variables:
 $ ID    : int  16 20 20 20 20 20 20 20 20 40 ...
 $ Name  : chr  "Juhamatti Tapio Aaltonen" "Kjetil Andr Aamodt" "Kjetil Andr Aamodt" "Kjetil Andr Aamodt" ...
 $ Sex   : chr  "M" "M" "M" "M" ...
 $ Age   : int  28 20 20 22 22 22 30 30 34 23 ...
 $ Height: int  184 176 176 176 176 176 176 176 176 NA ...
 $ Weight: num  85 85 85 85 85 85 85 85 85 NA ...
 $ Team  : chr  "Finland" "Norway" "Norway" "Norway" ...
 $ NOC   : chr  "FIN" "NOR" "NOR" "NOR" ...
 $ Games : chr  "2014 Winter" "1992 Winter" "1992 Winter" "1994 Winter" ...
 $ Year  : int  2014 1992 1992 1994 1994 1994 2002 2002 2006 1952 ...
 $ Season: chr  "Winter" "Winter" "Winter" "Winter" ...
 $ City  : chr  "Sochi" "Albertville" "Albertville" "Lillehammer" ...
 $ Sport : chr  "Ice Hockey" "Alpine Skiing" "Alpine Skiing" "Alpine Skiing" ...
 $ Event : chr  "Ice Hockey Men's Ice Hockey" "Alpine Skiing Men's Super G" "Alpine Skiing Men's Giant Slalom" ...
 $ Medal : chr  "Bronze" "Gold" "Bronze" "Silver" ...
> write.csv(OW,file="Wmedals2016.csv",fileEncoding="UTF-8")

> MT <- DF2MWN(OS,c("Name","Games","Team","NOC","Year","Sport","Event","Sex","Medal"),
+   w=c("Age","Height","Weight"),network="Olympic16S0",title="Summer Olympic medals till 2016")
> imed <- c(1,3,2)
> MT$links$Medal <- imed[MT$links$Medal]
> MT$nodes$Medal$ID <- c("Bronze","Silver","Gold")
> str(MT)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 4
  ..$ network: chr "Olympic16S0"
  ..$ title  : chr "Summer Olympic medals till 2016"
  ..$ by     : chr "DF2MWN"
  ..$ date   : chr "Mon Feb  6 01:23:56 2023"
 $ ways  :List of 9
  ..$ Name : chr "Name"
  ..$ Games: chr "Games"
  ..$ Team : chr "Team"
  ..$ NOC  : chr "NOC"
  ..$ Year : chr "Year"
  ..$ Sport: chr "Sport"
  ..$ Event: chr "Event"
  ..$ Sex  : chr "Sex"
  ..$ Medal: chr "Medal"
 $ nodes :List of 9
  ..$ Name :'data.frame':       24545 obs. of  1 variable:
  .. ..$ ID: chr [1:24545] "A. Albert" "A. Dubois" "A. Joshua \"Josh\" West" "A. Lawry" ...
  ..$ Games:'data.frame':       29 obs. of  1 variable:
  .. ..$ ID: chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ...
  ..$ Team :'data.frame':       480 obs. of  1 variable:
  .. ..$ ID: chr [1:480] "A North American Team" "Afghanistan" "Algeria" "Ali-Baba II" ...
  ..$ NOC  :'data.frame':       147 obs. of  1 variable:
  .. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ...
  ..$ Year :'data.frame':       29 obs. of  1 variable:
  .. ..$ ID: chr [1:29] "1896" "1900" "1904" "1906" ...
  ..$ Sport:'data.frame':       52 obs. of  1 variable:
  .. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ...
  ..$ Event:'data.frame':       642 obs. of  1 variable:
  .. ..$ ID: chr [1:642] "Aeronautics Mixed Aeronautics" "Alpinism Mixed Alpinism" "Archery Men's Au Chapelet, 33 metres" ...
  ..$ Sex  :'data.frame':       2 obs. of  1 variable:
  .. ..$ ID: chr [1:2] "F" "M"
  ..$ Medal:'data.frame':       3 obs. of  1 variable:
  .. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold"
 $ links :'data.frame': 34088 obs. of  13 variables:
  ..$ one   : num [1:34088] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ Name  : int [1:34088] 5273 2144 2144 17644 17644 17644 17644 17644 18628 777 ...
  ..$ Games : int [1:34088] 2 7 7 12 12 12 12 13 27 7 ...
  ..$ Team  : int [1:34088] 105 141 141 141 141 141 141 141 312 312 ...
  ..$ NOC   : int [1:34088] 32 43 43 43 43 43 43 43 97 97 ...
  ..$ Year  : int [1:34088] 2 7 7 12 12 12 12 13 27 7 ...
  ..$ Sport : int [1:34088] 48 41 41 22 22 22 22 22 23 22 ...
  ..$ Event : int [1:34088] 587 514 527 298 309 297 305 309 321 310 ...
  ..$ Sex   : int [1:34088] 2 2 2 2 2 2 2 2 1 2 ...
  ..$ Medal : num [1:34088] 3 1 1 1 3 3 3 1 3 2 ...
  ..$ Age   : int [1:34088] 34 30 30 28 28 28 28 32 27 24 ...
  ..$ Height: int [1:34088] NA NA NA 175 175 175 175 175 163 NA ...
  ..$ Weight: num [1:34088] NA NA NA 64 64 64 64 64 NA NA ...
 $ data  : list()
> write(toJSON(MT),"Olympics16S0.json")


> MD <- MT
> L <- MD$links[,!(names(MD$links) %in% c("Name","Year","Event","Team"))]
> Links <- aggregate(L[,c("one","Age","Height","Weight")],by=list(L$Games,L$NOC,L$Sport,L$Sex,L$Medal),FUN=sum)
> names(Links) <- c("Games","NOC","Sport","Sex","Medal","w","Age","Height","Weight")
> MD$links <- Links
> ways <- c("Games","NOC","Sport","Sex","Medal"); Ways <- as.list(ways); names(Ways) <- ways
> MD$ways <- Ways; N <- MT$nodes
> MD$nodes <- list(N$Games,N$NOC,N$Sport,N$Sex,N$Medal)
> names(MD$nodes) <- ways
> MD$info$network <- "Olympic16S"
> str(MD)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 4
  ..$ network: chr "Olympic16S"
  ..$ title  : chr "Summer Olympic medals till 2016"
  ..$ by     : chr "DF2MWN"
  ..$ date   : chr "Mon Feb  6 01:23:56 2023"
 $ ways  :List of 5
  ..$ Games: chr "Games"
  ..$ NOC  : chr "NOC"
  ..$ Sport: chr "Sport"
  ..$ Sex  : chr "Sex"
  ..$ Medal: chr "Medal"
 $ nodes :List of 5
  ..$ Games:'data.frame':       29 obs. of  1 variable:
  .. ..$ ID: chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ...
  ..$ NOC  :'data.frame':       147 obs. of  1 variable:
  .. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ...
  ..$ Sport:'data.frame':       52 obs. of  1 variable:
  .. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ...
  ..$ Sex  :'data.frame':       2 obs. of  1 variable:
  .. ..$ ID: chr [1:2] "F" "M"
  ..$ Medal:'data.frame':       3 obs. of  1 variable:
  .. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold"
 $ links :'data.frame': 10429 obs. of  9 variables:
  ..$ Games : int [1:10429] 23 20 27 5 26 25 28 21 22 25 ...
  ..$ NOC   : int [1:10429] 41 43 44 47 47 50 71 75 75 75 ...
  ..$ Sport : int [1:10429] 3 3 3 3 3 3 3 3 3 3 ...
  ..$ Sex   : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ Medal : num [1:10429] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ w     : num [1:10429] 4 1 3 1 1 3 3 1 1 1 ...
  ..$ Age   : int [1:10429] 96 27 86 40 32 99 67 22 17 29 ...
  ..$ Height: int [1:10429] 674 171 492 NA 170 509 500 164 170 165 ...
  ..$ Weight: num [1:10429] 254 61 189 NA 73 203 180 54 68 58 ...
 $ data  : list()
> write(toJSON(MD),"Olympics16S.json")

Additional data the Olympics

Additional data about the Olympics are available in Wikipedia. I copied the table into a text file places.txt and edit it.

> G <- read.csv("../places.txt",sep=";",skip=1,head=TRUE,strip.white=TRUE)
> GG <- G[(G$Summer!="")&(G$open!=""),c("City","Year","Summer","Country","Region")]
> OL$nodes$Games$City <- GG$City[1:29]
> OL$nodes$Games$Year <- GG$Year[1:29]
> OL$nodes$Games$Num <- GG$Summer[1:29]
> OL$nodes$Games$Country <- GG$Country[1:29]
> OL$nodes$Games$Region <- GG$Region[1:29]
> str(OL)
List of 6
 $ format: chr "MWnets"
 $ info  :List of 4
  ..$ network: chr "Olympic16S"
  ..$ title  : chr "Summer Olympic medals till 2016"
  ..$ by     : chr "DF2MWN"
  ..$ date   : chr "Mon Feb  6 01:23:56 2023"
 $ ways  :List of 5
  ..$ Games: chr "Games"
  ..$ NOC  : chr "NOC"
  ..$ Sport: chr "Sport"
  ..$ Sex  : chr "Sex"
  ..$ Medal: chr "Medal"
 $ nodes :List of 5
  ..$ Games:'data.frame':       29 obs. of  6 variables:
  .. ..$ ID     : chr [1:29] "1896 Summer" "1900 Summer" "1904 Summer" "1906 Summer" ...
  .. ..$ City   : chr [1:29] "Athens" "Paris" "St. Louis" "Athens" ...
  .. ..$ Year   : int [1:29] 1896 1900 1904 1906 1908 1912 1920 1924 1928 1932 ...
  .. ..$ Num    : chr [1:29] "I" "II" "III" "Intercalated" ...
  .. ..$ Country: chr [1:29] "Greece" "France" "United States" "Greece" ...
  .. ..$ Region : chr [1:29] "Europe" "Europe" "North America" "Europe" ...
  ..$ NOC  :'data.frame':       147 obs. of  1 variable:
  .. ..$ ID: chr [1:147] "AFG" "AHO" "ALG" "ANZ" ...
  ..$ Sport:'data.frame':       52 obs. of  1 variable:
  .. ..$ ID: chr [1:52] "Aeronautics" "Alpinism" "Archery" "Art Competitions" ...
  ..$ Sex  :'data.frame':       2 obs. of  1 variable:
  .. ..$ ID: chr [1:2] "F" "M"
  ..$ Medal:'data.frame':       3 obs. of  1 variable:
  .. ..$ ID: chr [1:3] "Bronze" "Silver" "Gold"
 $ links :'data.frame': 10429 obs. of  9 variables:
  ..$ Games : int [1:10429] 23 20 27 5 26 25 28 21 22 25 ...
  ..$ NOC   : int [1:10429] 41 43 44 47 47 50 71 75 75 75 ...
  ..$ Sport : int [1:10429] 3 3 3 3 3 3 3 3 3 3 ...
  ..$ Sex   : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ Medal : int [1:10429] 1 1 1 1 1 1 1 1 1 1 ...
  ..$ w     : int [1:10429] 4 1 3 1 1 3 3 1 1 1 ...
  ..$ Age   : int [1:10429] 96 27 86 40 32 99 67 22 17 29 ...
  ..$ Height: int [1:10429] 674 171 492 NA 170 509 500 164 170 165 ...
  ..$ Weight: num [1:10429] 254 61 189 NA 73 203 180 54 68 58 ...
 $ data  : list()
> write(toJSON(OL),"Olympics16S.json")

Relational core