There are yearly data about crime in Chicago at (1). On January 28, 2023 I downloaded the data Crimes_-_One_year_prior_to_present.csv
from (2). Recently the map of wards changed.
A very general approach to producing a multiway network is to take a data frame with several nominal (or ordinal) variables and select the ways among them. Afterward, we transform it into the corresponding multiway network. For this task, I wrote the function DF2MWN
.
> DF2MWN <- function(DF,ways,w=NULL,network="test",title="Test"){ + info <- list(network=network,title=title,by="DF2MWN",date=date()) + Ways <- as.list(ways); names(Ways) <- ways + data <- list() + nodes <- NULL; links <- data.frame(one=rep(1,nrow(CH))) + for(way in ways){ + q <- factor(DF[[way]]) + nodes[[way]] <- data.frame(ID=levels(q)) + links[[way]] <- as.integer(q) + } + MN <- list(format="MWnets",info=info,ways=Ways,nodes=nodes,links=links,data=data) + return(MN) + }
The function DF2MWN
is available in the library MWnets
.
> setwd("C:/Users/vlado/DL/data/multi/chicago") > library(jsonlite) > library(Polychrome) > library(magrittr) > source("https://raw.githubusercontent.com/bavla/ibm3m/master/multiway/MWnets.R") > CH <- read.table("Crimes_-_One_year_prior_to_present.csv",sep=",",header=FALSE,skip=1,fill=TRUE) > dim(CH) [1] 239616 17 > names(CH) <- c("case","date","block","iucr","primary","secondary","location","arrest","domestic", + "beat","ward","fbi","x","y","lat","lon","loc") > head(CH) > MT <- DF2MWN(CH,c("primary","location","arrest","domestic","ward","fbi"), + network="ChicagoCrime22",title="City of Chicago incidents of crime 2022") > str(MT) List of 6 $ format: chr "MWnets" $ info :List of 4 ..$ network: chr "ChicagoCrime22" ..$ title : chr "City of Chicago incidents of crime 2022" ..$ by : chr "DF2MWN" ..$ date : chr "Mon Jan 30 04:02:07 2023" $ ways :List of 6 ..$ primary : chr "primary" ..$ location: chr "location" ..$ arrest : chr "arrest" ..$ domestic: chr "domestic" ..$ ward : chr "ward" ..$ fbi : chr "fbi" $ nodes :List of 6 ..$ primary :'data.frame': 31 obs. of 1 variable: .. ..$ ID: chr [1:31] "ARSON" "ASSAULT" "BATTERY" "BURGLARY" ... ..$ location:'data.frame': 135 obs. of 1 variable: .. ..$ ID: chr [1:135] "" "ABANDONED BUILDING" "AIRCRAFT" "AIRPORT BUILDING NON-TERMINAL - NON-SECURE AREA" ... ..$ arrest :'data.frame': 2 obs. of 1 variable: .. ..$ ID: chr [1:2] "N" "Y" ..$ domestic:'data.frame': 2 obs. of 1 variable: .. ..$ ID: chr [1:2] "N" "Y" ..$ ward :'data.frame': 50 obs. of 1 variable: .. ..$ ID: chr [1:50] "1" "2" "3" "4" ... ..$ fbi :'data.frame': 26 obs. of 1 variable: .. ..$ ID: chr [1:26] "01A" "01B" "02" "03" ... $ links :'data.frame': 239616 obs. of 7 variables: ..$ one : num [1:239616] 1 1 1 1 1 1 1 1 1 1 ... ..$ primary : int [1:239616] 30 9 30 30 30 30 30 30 30 6 ... ..$ location: int [1:239616] 120 103 27 105 33 19 120 62 120 64 ... ..$ arrest : int [1:239616] 1 1 1 1 1 1 1 1 1 1 ... ..$ domestic: int [1:239616] 1 1 1 1 1 1 1 1 1 1 ... ..$ ward : int [1:239616] 27 39 42 49 42 27 43 25 50 5 ... ..$ fbi : int [1:239616] 8 14 8 8 8 8 8 8 8 17 ... $ data : list()
Some variables may need a special treatment - as date/time that we would like transform into hours.
> # https://en.wikipedia.org/wiki/12-hour_clock > hour <- function(s){t <- unlist(strsplit(s," ")) + as.integer(unlist(strsplit(t[2],":"))[1])%%12+12*(t[3]=="PM")} > sapply(CH$date[1:20],hour) 01/29/2022 11:55:00 PM 01/29/2022 12:02:00 PM 01/29/2022 07:35:00 PM 03/10/2022 06:20:00 PM 23 12 19 18 03/12/2022 01:45:00 PM 01/29/2022 07:50:00 PM 01/25/2022 09:00:00 AM 08/07/2022 08:05:00 PM 13 19 9 20 02/21/2022 03:00:00 PM 03/12/2022 11:00:00 PM 09/06/2022 12:00:00 PM 01/25/2022 12:36:00 AM 15 23 12 0 03/24/2022 04:27:00 AM 03/13/2022 04:45:00 PM 11/03/2022 08:30:00 AM 02/22/2022 06:00:00 PM 4 16 8 18 03/23/2022 02:35:00 PM 03/15/2022 07:30:00 PM 05/13/2022 09:30:00 PM 03/13/2022 02:30:00 PM 14 19 21 14
We add such weights separately. We can add also some additional data or metadata.
> h <- sapply(CH$date,hour) > MT$links$hour <- h > MT$info$URL <- "https://www.chicago.gov/city/en/dataset/crime.html" > names(MT$links)[1] <- "w" > write(toJSON(MT),"ChicagoCrime0.json")
The basic multiway network constructed in the previous step can still have multiple links. To get a simple network we have to aggregate it. In this process, we can also omit some ways.
> L <- MT$links > Links <- aggregate(L$w,by=list(L$primary,L$location,L$ward),FUN=sum) > names(Links) <- c("primary","location","ward","w") > MT$links <- Links > ways <- c("primary","location","ward"); Ways <- as.list(ways); names(Ways) <- ways > MT$ways <- Ways > MT$nodes <- list(MT$nodes$primary,MT$nodes$location,MT$nodes$ward) > names(MT$nodes) <- MT$ways > MT$nodes$ward$long <- paste("ward",MT$nodes$ward$ID) > str(MT) List of 6 $ format: chr "MWnets" $ info :List of 5 ..$ network: chr "ChicagoCrime22" ..$ title : chr "City of Chicago incidents of crime 2022" ..$ by : chr "DF2MWN" ..$ date : chr "Mon Jan 30 04:02:07 2023" ..$ URL : chr "https://www.chicago.gov/city/en/dataset/crime.html" $ ways :List of 3 ..$ primary : chr "primary" ..$ location: chr "location" ..$ ward : chr "ward" $ nodes :List of 3 ..$ primary :'data.frame': 31 obs. of 1 variable: .. ..$ ID: chr [1:31] "ARSON" "ASSAULT" "BATTERY" "BURGLARY" ... ..$ location:'data.frame': 135 obs. of 1 variable: .. ..$ ID: chr [1:135] "" "ABANDONED BUILDING" "AIRCRAFT" "AIRPORT BUILDING NON-TERMINAL - NON-SECURE AREA" ... ..$ ward :'data.frame': 50 obs. of 2 variables: .. ..$ ID : chr [1:50] "1" "2" "3" "4" ... .. ..$ long: chr [1:50] "ward 1" "ward 2" "ward 3" "ward 4" ... $ links :'data.frame': 18598 obs. of 4 variables: ..$ primary : int [1:18598] 9 6 27 30 1 2 3 6 8 9 ... ..$ location: int [1:18598] 1 2 2 2 17 17 17 17 17 17 ... ..$ ward : int [1:18598] 1 1 1 1 1 1 1 1 1 1 ... ..$ w : num [1:18598] 20 1 1 1 1 12 8 22 2 2 ... $ data : list() > write(toJSON(MT),"ChicagoCrime1.json")
January 30, 2023