====== Austin R. Benson hypernet datasets in R ====== https://www.cs.cornell.edu/~arb/data/ ===== How to read the data ===== Let's look at the ''contact-high-school'' dataset. In the file ''DATA-DESCRIPTION.txt'' we find the description of the hypernets format used in ''arb''. > wdir <- "C:/Users/vlado/DL/data/hyper" > setwd(wdir) > N <- read.csv("./school/contact-high-school/ex-nverts.txt",header=FALSE) > N V1 1 3 2 2 3 4 > E <- read.csv("./school/contact-high-school/ex-simplices.txt",header=FALSE) > E V1 1 1 2 2 3 3 4 2 5 4 6 1 7 3 8 4 9 5 > T <- read.csv("./school/contact-high-school/ex-times.txt",header=FALSE) > T V1 1 10 2 15 3 21 > H <- data.frame(E=rep(NA,3),T=T$V1) > H E T 1 NA 10 2 NA 15 3 NA 21 > L = vector(mode="list",3) > L[[1]] <- E$V1[1:3] > L[[2]] <- E$V1[4:5] > L[[3]] <- E$V1[6:9] > H$E <- L > H E T 1 1, 2, 3 10 2 2, 4 15 3 1, 3, 4, 5 21 > H$E[[1]] [1] 1 2 3 > H$E[[2]] [1] 2 4 > m <- nrow(N); lab <- paste("e",1:m,sep="") > f <- cumsum(N$V1); s <- c(1,1+f[1:(m-1)]) > H <- data.frame(ID=lab,T=T$V1,E=rep(NA,m)) > L = vector(mode="list",m) > for(i in 1:m) L[[i]] <- E$V1[s[i]:f[i]] > H$E <- L > H ID T E 1 e1 10 1, 2, 3 2 e2 15 2, 4 3 e3 21 1, 3, 4, 5 ===== JSON format ===== > N <- read.csv("./school/contact-high-school/ex-nverts.txt",header=FALSE) > E <- read.csv("./school/contact-high-school/ex-simplices.txt",header=FALSE) > T <- read.csv("./school/contact-high-school/ex-times.txt",header=FALSE) > m <- nrow(N); lab <- paste("e",1:m,sep="") > f <- cumsum(N$V1); s <- c(1,1+f[1:(m-1)]) > H <- data.frame(ID=lab,T=T$V1,E=rep(NA,m)) > L = vector(mode="list",m) > for(i in 1:m) L[[i]] <- E$V1[s[i]:f[i]] > H$E <- L > n <- max(E$V1); names <- paste("v",1:n,sep="") > V <- data.frame(ID=names) > info <- list(network="ex", + title="ARB: simple example from DATA-DESCRIPTION.txt", + by="Austin R. Benson", + href="https://www.cs.cornell.edu/~arb/data/", + creator="V. Batagelj", + date=date(), + nNodes=n, + nLinks=m, + simple=TRUE ) > HN <- list(format="hypernets",info=info,nodes=V,links=H,data=list()) > str(HN) List of 5 $ format: chr "hypernets" $ info :List of 9 ..$ network: chr "ex" ..$ title : chr "ARB: simple example from DATA-DESCRIPTION.txt" ..$ by : chr "Austin R. Benson" ..$ href : chr "https://www.cs.cornell.edu/~arb/data/" ..$ creator: chr "V. Batagelj" ..$ date : chr "Wed Oct 18 01:32:14 2023" ..$ nNodes : int 5 ..$ nLinks : int 3 ..$ simple : logi TRUE $ nodes :'data.frame': 5 obs. of 1 variable: ..$ ID: chr [1:5] "v1" "v2" "v3" "v4" ... $ links :'data.frame': 3 obs. of 3 variables: ..$ ID: chr [1:3] "e1" "e2" "e3" ..$ T : int [1:3] 10 15 21 ..$ E :List of 3 .. ..$ : int [1:3] 1 2 3 .. ..$ : int [1:2] 2 4 .. ..$ : int [1:4] 1 3 4 5 $ data : list() > write(toJSON(HN),"ex.json") ===== ''contact-high-school'' dataset ===== > library(jsonlite) > N <- read.csv("./school/contact-high-school/contact-high-school-nverts.txt",header=FALSE) > E <- read.csv("./school/contact-high-school/contact-high-school-simplices.txt",header=FALSE) > T <- read.csv("./school/contact-high-school/contact-high-school-times.txt",header=FALSE) > m <- nrow(N); lab <- paste("e",1:m,sep="") > f <- cumsum(N$V1); s <- c(1,1+f[1:(m-1)]) > H <- data.frame(ID=lab,T=T$V1,E=rep(NA,m)) > L = vector(mode="list",m) > for(i in 1:m) L[[i]] <- E$V1[s[i]:f[i]] > H$E <- L > head(H) ID T E 1 e1 1385982020 2, 1 2 e2 1385982020 9, 11 3 e3 1385982020 40, 39 4 e4 1385982020 46, 45 5 e5 1385982020 26, 25 6 e6 1385982020 55, 56 > tail(H) ID T E 172030 e172030 1386345560 133, 281 172031 e172031 1386345560 258, 205 172032 e172032 1386345580 4, 108 172033 e172033 1386345580 133, 281 172034 e172034 1386345580 161, 84 172035 e172035 1386345580 258, 205 > size <- sapply(H$E,length) > table(size) size 2 3 4 5 163977 7475 576 7 > H[which(size==5),] ID T E 9266 e9266 1385989560 166, 145, 268, 276, 269 21312 e21312 1385997200 28, 76, 41, 91, 85 21478 e21478 1385997280 163, 77, 58, 127, 245 21945 e21945 1385997480 92, 29, 28, 76, 85 51855 e51855 1386071700 163, 83, 186, 82, 127 74500 e74500 1386144500 166, 285, 145, 144, 276 116649 e116649 1386234740 288, 258, 205, 265, 303 > F <- table(E) > head(sort(F,decreasing=TRUE),15) V1 78 16 95 61 49 77 270 139 301 17 71 126 217 6 60 4495 4012 3983 3885 3851 3697 3386 3332 3330 3295 3205 3123 3042 2948 2673 > n <- max(E$V1); names <- paste("v",1:n,sep="") > V <- data.frame(ID=names) > info <- list(network="contact-high-school", + title="ARB: contact-high-school dataset", + by="Rossana Mastrandrea, Julie Fournet, and Alain Barrat", + ref="Mastrandrea, etal: Contact Patterns in a High School. PLOS ONE, 2015", + href="https://www.cs.cornell.edu/~arb/data/contact-high-school/", + creator="V. Batagelj", + date=date(), + nNodes=n, + nLinks=m, + simple=NA ) > HN <- list(format="hypernets",info=info,nodes=V,links=H,data=list()) > str(HN) List of 5 $ format: chr "hypernets" $ info :List of 10 ..$ network: chr "contact-high-school" ..$ title : chr "ARB: contact-high-school dataset" ..$ by : chr "Rossana Mastrandrea, Julie Fournet, and Alain Barrat" ..$ ref : chr "Mastrandrea, etal: Contact Patterns in a High School. PLOS ONE, 2015" ..$ href : chr "https://www.cs.cornell.edu/~arb/data/contact-high-school/" ..$ creator: chr "V. Batagelj" ..$ date : chr "Tue Oct 17 18:39:22 2023" ..$ nNodes : int 327 ..$ nLinks : int 172035 ..$ simple : logi NA $ nodes :'data.frame': 327 obs. of 1 variable: ..$ ID: chr [1:327] "v1" "v2" "v3" "v4" ... $ links :'data.frame': 172035 obs. of 3 variables: ..$ ID: chr [1:172035] "e1" "e2" "e3" "e4" ... ..$ T : int [1:172035] 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 ... ..$ E :List of 172035 .. ..$ : int [1:2] 2 1 .. ..$ : int [1:2] 9 11 .. ..$ : int [1:2] 40 39 .. .. [list output truncated] $ data : list() > write(toJSON(HN),"contact-high-school.json") ===== Other datasets ===== - [[.:arb:cook|Cooking]] - [[.:arb:zoo|Zoo]] In R, we can read a hypernets JSON file directly from the GitHub > library(jsonlite) > wdir <- "C:/test/data/hyper" > setwd(wdir) > hfile <- "https://raw.githubusercontent.com/bavla/hypernets/main/data/Cooking.json" > HN <- fromJSON(hfile) > str(HN) Larger hypernets files are ZIPed. We can read them directly using [[https://search.r-project.org/CRAN/refmans/Hmisc/html/getZip.html|getZip]] > library(jsonlite) > library(Hmisc) > wdir <- "C:/test/data/hyper" > setwd(wdir) > hfile <- "https://raw.githubusercontent.com/bavla/hypernets/main/data/Cooking.zip" > H <- fromJSON(getZip(hfile)) > str(H) ===== To do ===== * hypernet -> two-mode * hypernet -> matrix * hypernet -> simple * two-mode -> hypernet