====== ENRON ======
There are two somehow cleaned versions of raw data [[https://www.cs.cmu.edu/~./enron/|CMU]] / [[http://www.cs.cmu.edu/~enron/enron_mail_20150507.tgz|tgz]] and [[http://bailando.sims.berkeley.edu/enron_email.html|UC Berkeley]]. There is also a version with removed privacy info [[http://info.nuix.com/Enron.html|Enron]] from [[http://www.edrm.net/resources/data-sets/edrm-enron-email-data-set|EDRM]]. Some preprocessed data are available at [[http://research.cs.queensu.ca/home/skill/otherforms.html|David Skillicorn]]'s page.
===== Networks =====
Some networks derived from the Enron data are available:
* [[http://konect.uni-koblenz.de/networks/enron|Konect]] - labels ?
* [[http://snap.stanford.edu/data/email-Enron.html|Snap]] - no labels
* Conrad Lee: [[http://sociograph.blogspot.si/2011/04/communication-networks-part-1-enron-e.html|Enron]] (see **Download**)
* Y. Park: [[http://cis.jhu.edu/~parky/Enron/|Scan Statistics on Enron Graphs]] - 184 nodes, temporal
* Sujit Pal: [[https://github.com/sujitpal/mlia-examples/tree/master/src/enron_network|Enron network / GitHub]] - nodes labeled with e-mail, arcs weighted with frequency
* [[https://cran.r-project.org/web/packages/igraphdata/igraphdata.pdf|igraph / R - Enron]] - limited to 184 nodes
* [[http://odds.cs.stonybrook.edu/enroninc-dataset/|Enronic]]
===== Analyses =====
* [[http://homes.cs.washington.edu/~jheer/|Jeffrey M. Heer]], [[http://hci.stanford.edu/~jheer/projects/enron/v1/|Enron 1]], [[http://homes.cs.washington.edu/~jheer//projects/enron/|Enron 2]]
* Damon Wade: [[https://www.linkedin.com/pulse/visualizing-email-networks-damon-wade|Visualizing Email Networks]]
* Philip Starritt: [[http://www.philipstarritt.com/enron|Data Mining the Enron Email Dataset]]
* Sujit Pal: [[http://sujitpal.blogspot.si/2013/11/using-graph-centrality-metrics-for.html|Using Graph Centrality Metrics for Crime Fighting]]
===== igraph/R - Enron =====
> library(igraph)
> library(igraphdata)
> help(igraphdata)
> data(package="igraphdata")
> data(enron)
> help(enron)
> arcs <- E(enron)
> length(arcs)
[1] 125409
> arcs[1]
Edge sequence:
e
e [1] 25 -> 154
> arcs[125409]
Edge sequence:
e
e [125409] 18 -> 18
> nodes <- V(enron)
> length(nodes)
[1] 184
> enron
IGRAPH D--- 184 125409 -- Enron email network
+ attr: LDC_names (g/c), LDC_desc (g/c), name (g/c), Citation (g/c),
| Email (v/c), Name (v/c), Note (v/c), Time (e/c), Reciptype (e/c),
| Topic (e/n), LDC_topic (e/n)
+ edges:
[1] 25->154 25->154 30-> 30 30-> 30 30-> 30 30-> 30 39-> 39 52-> 67
[9] 52-> 67 52-> 67 52-> 67 61->100 61->100 61->163 61->163 61->166
[17] 61->166 61->170 64-> 59 64-> 59 64-> 64 64-> 64 64->147 64->147
[25] 64->164 64->164 64->168 66-> 66 66-> 66 67->129 67->129 67->129
[33] 67->129 93-> 10 93-> 10 93-> 10 93-> 10 93-> 39 93-> 39 93-> 93
[41] 93-> 93 93-> 93 93-> 93 93->124 93->124 100-> 61 100-> 61 115->115
+ ... omitted several edges
> enron$name
[1] "Enron email network"
> vertex_attr_names(enron)
[1] "Email" "Name" "Note"
> edge_attr_names(enron)
[1] "Time" "Reciptype" "Topic" "LDC_topic"
> graph_attr_names(enron)
[1] "LDC_names" "LDC_desc" "name" "Citation"
> vertex_attr(enron,'Name', index = c(1))
[1] "Albert Meyers"
> vertex_attr(enron,'Name', index = c(2))
[1] "Thomas Martin"
> vertex_attr(enron,'Email', index = c(1,2))
[1] "albert.meyers" "a..martin"
> vertex_attr(enron,'Note', index = c(1,2))
[1] "Employee, Specialist" "Vice President"
> edge_attr(enron,'Time', index = 1:3)
[1] "1979-12-31 21:00:00" "1979-12-31 21:00:00" "1979-12-31 21:00:00"
> edge_attr(enron,'Topic', index = 1:3)
[1] 1 1 3
> edge_attr(enron,'LDC_topic', index = 1:3)
[1] 0 -1 -1
> edge_attr(enron,'Reciptype', index = 1:3)
[1] "to" "to" "cc"
> graph_attr(enron,'LDC_names')
[1] "Calif_analysis" "Calif_bankruptcy" "Calif_utilities"
[4] "Calif_crisis_legal" "Calif_enron" "Calif_federal"
[7] "Newsfeed_Calif" "Calif_legis" "Daily_business"
[10] "Educational" "EnronOnline" "Kitchen_daily"
[13] "Kitchen_fortune" "Energy_newsfeed" "General_newsfeed"
[16] "Downfall" "Downfall_newsfeed" "Broadband"
[19] "Federal_gov" "FERC_DOE" "College Football"
[22] "Pro Football" "India_General" "India_Dabhol"
[25] "Nine_eleven" "Nine_Eleven_Analysis" "Dynegy"
[28] "Sempra" "Duke" "El Paso"
[31] "Pipelines" "World_energy"
> a <- arcs[1]
> a
+ 1/125409 edge:
[1] 25->154
> ends(enron,a)
[,1] [,2]
[1,] 25 154
>
==== iGraph -> Pajek ====
library(igraph)
library(igraphdata)
data(enron)
minutes <- function(d) as.numeric(as.POSIXct(d,format="%Y-%m-%d %H:%M:%OS"))/60
setwd("D:/Data/Enron/iGraph")
nodes <- V(enron); arcs <- E(enron); T <- arcs$Time
n <- length(nodes); m <- length(arcs);
notes <- factor(nodes$Note); fNotes <- levels(notes)
rTypes <- factor(arcs$Reciptype); fTypes <- levels(rTypes)
topics <- factor(arcs$Topic); fTopics <- levels(topics)
tops <- factor(arcs$LDC_topic); fTops <- levels(tops)
net <- file("enron.net","w"); cat('*vertices ',n,'\n',sep='',file=net)
clu <- file("enronNote.clu","w"); cat('%',file=clu)
for(i in 1:length(fNotes)) cat(' ',i,fNotes[i],file=clu)
cat('\n*vertices ',n,'\n',sep='',file=clu)
for(v in 1:n) {
cat(v,' "',nodes$Name[v],'"\n',sep='',file=net);
cat(notes[v],'\n',file=clu)
}
cat('% Types:',file=net)
for(t in 1:length(fTypes)) cat(' ',t,' ',fTypes[t],sep='',file=net)
cat('\n',file=net)
tNames <- c('no match','outlier',graph_attr(enron, 'LDC_names'))
for(t in 1:length(fTops)) cat('*arcs :',t,' "',tNames[t],'"\n',sep='',file=net)
cat('*arcs\n',file=net); X <- ends(enron,arcs)
start <- "1979-12-31 20:00:00"; s <- minutes(start)
for(a in 1:m)
cat(as.integer(tops[a]),': ',X[a,1],' ',X[a,2],' 1 [',minutes(T[a])-s,
'] l "',fTypes[rTypes[a]],'"\n',sep='',file=net)
close(net); close(clu)
{{pajek:data:zip:enron184.zip}}
http://kateto.net/networks-r-igraph