Austin R. Benson hypernet datasets in R

How to read the data

Let's look at the contact-high-school dataset. In the file DATA-DESCRIPTION.txt we find the description of the hypernets format used in arb.

> wdir <- "C:/Users/vlado/DL/data/hyper"
> setwd(wdir)
> N <- read.csv("./school/contact-high-school/ex-nverts.txt",header=FALSE)
> N
  V1
1  3
2  2
3  4
> E <- read.csv("./school/contact-high-school/ex-simplices.txt",header=FALSE)
> E
  V1
1  1
2  2
3  3
4  2
5  4
6  1
7  3
8  4
9  5
> T <- read.csv("./school/contact-high-school/ex-times.txt",header=FALSE)
> T
  V1
1 10
2 15
3 21
> H <- data.frame(E=rep(NA,3),T=T$V1)
> H
   E  T
1 NA 10
2 NA 15
3 NA 21
> L = vector(mode="list",3)
> L[[1]] <- E$V1[1:3]
> L[[2]] <- E$V1[4:5]
> L[[3]] <- E$V1[6:9]
> H$E <- L
> H
           E  T
1    1, 2, 3 10
2       2, 4 15
3 1, 3, 4, 5 21
> H$E[[1]]
[1] 1 2 3
> H$E[[2]]
[1] 2 4

> m <- nrow(N); lab <- paste("e",1:m,sep="")
> f <- cumsum(N$V1); s <- c(1,1+f[1:(m-1)])
> H <- data.frame(ID=lab,T=T$V1,E=rep(NA,m))
> L = vector(mode="list",m)
> for(i in 1:m) L[[i]] <- E$V1[s[i]:f[i]]
> H$E <- L
> H
  ID  T          E
1 e1 10    1, 2, 3
2 e2 15       2, 4
3 e3 21 1, 3, 4, 5

JSON format

> N <- read.csv("./school/contact-high-school/ex-nverts.txt",header=FALSE)
> E <- read.csv("./school/contact-high-school/ex-simplices.txt",header=FALSE)
> T <- read.csv("./school/contact-high-school/ex-times.txt",header=FALSE)
> m <- nrow(N); lab <- paste("e",1:m,sep="")
> f <- cumsum(N$V1); s <- c(1,1+f[1:(m-1)])
> H <- data.frame(ID=lab,T=T$V1,E=rep(NA,m))
> L = vector(mode="list",m)
> for(i in 1:m) L[[i]] <- E$V1[s[i]:f[i]]
> H$E <- L
> n <- max(E$V1); names <- paste("v",1:n,sep="")
> V <- data.frame(ID=names)
> info <- list(network="ex",
+   title="ARB: simple example from DATA-DESCRIPTION.txt",
+   by="Austin R. Benson",
+   href="https://www.cs.cornell.edu/~arb/data/",
+   creator="V. Batagelj",
+   date=date(),
+   nNodes=n, 
+   nLinks=m,
+   simple=TRUE )
> HN <- list(format="hypernets",info=info,nodes=V,links=H,data=list())
> str(HN)
List of 5
 $ format: chr "hypernets"
 $ info  :List of 9
  ..$ network: chr "ex"
  ..$ title  : chr "ARB: simple example from DATA-DESCRIPTION.txt"
  ..$ by     : chr "Austin R. Benson"
  ..$ href   : chr "https://www.cs.cornell.edu/~arb/data/"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Wed Oct 18 01:32:14 2023"
  ..$ nNodes : int 5
  ..$ nLinks : int 3
  ..$ simple : logi TRUE
 $ nodes :'data.frame': 5 obs. of  1 variable:
  ..$ ID: chr [1:5] "v1" "v2" "v3" "v4" ...
 $ links :'data.frame': 3 obs. of  3 variables:
  ..$ ID: chr [1:3] "e1" "e2" "e3"
  ..$ T : int [1:3] 10 15 21
  ..$ E :List of 3
  .. ..$ : int [1:3] 1 2 3
  .. ..$ : int [1:2] 2 4
  .. ..$ : int [1:4] 1 3 4 5
 $ data  : list()
> write(toJSON(HN),"ex.json")

''contact-high-school'' dataset

> library(jsonlite)
> N <- read.csv("./school/contact-high-school/contact-high-school-nverts.txt",header=FALSE)
> E <- read.csv("./school/contact-high-school/contact-high-school-simplices.txt",header=FALSE)
> T <- read.csv("./school/contact-high-school/contact-high-school-times.txt",header=FALSE)
> m <- nrow(N); lab <- paste("e",1:m,sep="")
> f <- cumsum(N$V1); s <- c(1,1+f[1:(m-1)])
> H <- data.frame(ID=lab,T=T$V1,E=rep(NA,m))
> L = vector(mode="list",m)
> for(i in 1:m) L[[i]] <- E$V1[s[i]:f[i]]
> H$E <- L
> head(H)
   ID          T      E
1  e1 1385982020   2, 1
2  e2 1385982020  9, 11
3  e3 1385982020 40, 39
4  e4 1385982020 46, 45
5  e5 1385982020 26, 25
6  e6 1385982020 55, 56
> tail(H)
            ID          T        E
172030 e172030 1386345560 133, 281
172031 e172031 1386345560 258, 205
172032 e172032 1386345580   4, 108
172033 e172033 1386345580 133, 281
172034 e172034 1386345580  161, 84
172035 e172035 1386345580 258, 205
> size <- sapply(H$E,length)
> table(size)
size
     2      3      4      5 
163977   7475    576      7 
> H[which(size==5),]
            ID          T                       E
9266     e9266 1385989560 166, 145, 268, 276, 269
21312   e21312 1385997200      28, 76, 41, 91, 85
21478   e21478 1385997280   163, 77, 58, 127, 245
21945   e21945 1385997480      92, 29, 28, 76, 85
51855   e51855 1386071700   163, 83, 186, 82, 127
74500   e74500 1386144500 166, 285, 145, 144, 276
116649 e116649 1386234740 288, 258, 205, 265, 303
> F <- table(E)
> head(sort(F,decreasing=TRUE),15)
V1
  78   16   95   61   49   77  270  139  301   17   71  126  217    6   60 
4495 4012 3983 3885 3851 3697 3386 3332 3330 3295 3205 3123 3042 2948 2673 
> n <- max(E$V1); names <- paste("v",1:n,sep="")
> V <- data.frame(ID=names)
> info <- list(network="contact-high-school",
+   title="ARB: contact-high-school dataset",
+   by="Rossana Mastrandrea, Julie Fournet, and Alain Barrat",
+   ref="Mastrandrea, etal: Contact Patterns in a High School. PLOS ONE, 2015",
+   href="https://www.cs.cornell.edu/~arb/data/contact-high-school/",
+   creator="V. Batagelj",
+   date=date(),
+   nNodes=n, 
+   nLinks=m,
+   simple=NA )
> HN <- list(format="hypernets",info=info,nodes=V,links=H,data=list())
> str(HN)
List of 5
 $ format: chr "hypernets"
 $ info  :List of 10
  ..$ network: chr "contact-high-school"
  ..$ title  : chr "ARB: contact-high-school dataset"
  ..$ by     : chr "Rossana Mastrandrea, Julie Fournet, and Alain Barrat"
  ..$ ref    : chr "Mastrandrea, etal: Contact Patterns in a High School. PLOS ONE, 2015"
  ..$ href   : chr "https://www.cs.cornell.edu/~arb/data/contact-high-school/"
  ..$ creator: chr "V. Batagelj"
  ..$ date   : chr "Tue Oct 17 18:39:22 2023"
  ..$ nNodes : int 327
  ..$ nLinks : int 172035
  ..$ simple : logi NA
 $ nodes :'data.frame': 327 obs. of  1 variable:
  ..$ ID: chr [1:327] "v1" "v2" "v3" "v4" ...
 $ links :'data.frame': 172035 obs. of  3 variables:
  ..$ ID: chr [1:172035] "e1" "e2" "e3" "e4" ...
  ..$ T : int [1:172035] 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 1385982020 ...
  ..$ E :List of 172035
  .. ..$ : int [1:2] 2 1
  .. ..$ : int [1:2] 9 11
  .. ..$ : int [1:2] 40 39
  .. .. [list output truncated]
 $ data  : list()
> write(toJSON(HN),"contact-high-school.json")

Other datasets

In R, we can read a hypernets JSON file directly from the GitHub

> library(jsonlite)
> wdir <- "C:/test/data/hyper"
> setwd(wdir)
> hfile <- "https://raw.githubusercontent.com/bavla/hypernets/main/data/Cooking.json"
> HN <- fromJSON(hfile)
> str(HN)

Larger hypernets files are ZIPed. We can read them directly using getZip

> library(jsonlite)
> library(Hmisc)
> wdir <- "C:/test/data/hyper"
> setwd(wdir)
> hfile <- "https://raw.githubusercontent.com/bavla/hypernets/main/data/Cooking.zip"
> H <- fromJSON(getZip(hfile))
> str(H)

To do

  • hypernet → two-mode
  • hypernet → matrix
  • hypernet → simple
  • two-mode → hypernet
vlado/work/hn/arb.txt · Last modified: 2023/10/19 03:20 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki