ENRON

There are two somehow cleaned versions of raw data CMU / tgz and UC Berkeley. There is also a version with removed privacy info Enron from EDRM. Some preprocessed data are available at David Skillicorn's page.

Networks

Some networks derived from the Enron data are available:

Analyses

igraph/R - Enron

> library(igraph) 
> library(igraphdata)
> help(igraphdata)
> data(package="igraphdata")
> data(enron)
> help(enron)
> arcs <- E(enron)
> length(arcs)
[1] 125409
> arcs[1]
Edge sequence:
    e           
e [1]  25 -> 154
> arcs[125409]
Edge sequence:
         e         
e [125409] 18 -> 18
> nodes <- V(enron)
> length(nodes)
[1] 184
> enron
IGRAPH D--- 184 125409 -- Enron email network
+ attr: LDC_names (g/c), LDC_desc (g/c), name (g/c), Citation (g/c),
| Email (v/c), Name (v/c), Note (v/c), Time (e/c), Reciptype (e/c),
| Topic (e/n), LDC_topic (e/n)
+ edges:
 [1]  25->154  25->154  30-> 30  30-> 30  30-> 30  30-> 30  39-> 39  52-> 67
 [9]  52-> 67  52-> 67  52-> 67  61->100  61->100  61->163  61->163  61->166
[17]  61->166  61->170  64-> 59  64-> 59  64-> 64  64-> 64  64->147  64->147
[25]  64->164  64->164  64->168  66-> 66  66-> 66  67->129  67->129  67->129
[33]  67->129  93-> 10  93-> 10  93-> 10  93-> 10  93-> 39  93-> 39  93-> 93
[41]  93-> 93  93-> 93  93-> 93  93->124  93->124 100-> 61 100-> 61 115->115
+ ... omitted several edges 
> enron$name
[1] "Enron email network"
> vertex_attr_names(enron)
[1] "Email" "Name"  "Note" 
> edge_attr_names(enron)
[1] "Time"      "Reciptype" "Topic"     "LDC_topic"
> graph_attr_names(enron)
[1] "LDC_names" "LDC_desc"  "name"      "Citation" 
> vertex_attr(enron,'Name', index = c(1))
[1] "Albert Meyers"
> vertex_attr(enron,'Name', index = c(2))
[1] "Thomas Martin"
> vertex_attr(enron,'Email', index = c(1,2))
[1] "albert.meyers" "a..martin"    
> vertex_attr(enron,'Note', index = c(1,2))
[1] "Employee, Specialist" "Vice President"      
> edge_attr(enron,'Time', index = 1:3)
[1] "1979-12-31 21:00:00" "1979-12-31 21:00:00" "1979-12-31 21:00:00"
> edge_attr(enron,'Topic', index = 1:3)
[1] 1 1 3
> edge_attr(enron,'LDC_topic', index = 1:3)
[1]  0 -1 -1 
> edge_attr(enron,'Reciptype', index = 1:3)
[1] "to" "to" "cc"
> graph_attr(enron,'LDC_names')
 [1] "Calif_analysis"       "Calif_bankruptcy"     "Calif_utilities"     
 [4] "Calif_crisis_legal"   "Calif_enron"          "Calif_federal"       
 [7] "Newsfeed_Calif"       "Calif_legis"          "Daily_business"      
[10] "Educational"          "EnronOnline"          "Kitchen_daily"       
[13] "Kitchen_fortune"      "Energy_newsfeed"      "General_newsfeed"    
[16] "Downfall"             "Downfall_newsfeed"    "Broadband"           
[19] "Federal_gov"          "FERC_DOE"             "College Football"    
[22] "Pro Football"         "India_General"        "India_Dabhol"        
[25] "Nine_eleven"          "Nine_Eleven_Analysis" "Dynegy"              
[28] "Sempra"               "Duke"                 "El Paso"             
[31] "Pipelines"            "World_energy"        
> a <- arcs[1]
> a
+ 1/125409 edge:
[1] 25->154
> ends(enron,a)
     [,1] [,2]
[1,]   25  154
> 

iGraph -> Pajek

library(igraph)
library(igraphdata)
data(enron)
minutes <- function(d) as.numeric(as.POSIXct(d,format="%Y-%m-%d %H:%M:%OS"))/60
setwd("D:/Data/Enron/iGraph")
nodes <- V(enron); arcs <- E(enron); T <- arcs$Time 
n <- length(nodes); m <- length(arcs); 
notes <- factor(nodes$Note); fNotes <- levels(notes)
rTypes <- factor(arcs$Reciptype); fTypes <- levels(rTypes)
topics <- factor(arcs$Topic); fTopics <- levels(topics)
tops <- factor(arcs$LDC_topic); fTops <- levels(tops)
net <- file("enron.net","w"); cat('*vertices ',n,'\n',sep='',file=net)
clu <- file("enronNote.clu","w"); cat('%',file=clu)
for(i in 1:length(fNotes)) cat(' ',i,fNotes[i],file=clu)
cat('\n*vertices ',n,'\n',sep='',file=clu)
for(v in 1:n) {
   cat(v,' "',nodes$Name[v],'"\n',sep='',file=net);
   cat(notes[v],'\n',file=clu)
}
cat('% Types:',file=net)
for(t in 1:length(fTypes)) cat('  ',t,' ',fTypes[t],sep='',file=net)
cat('\n',file=net)
tNames <- c('no match','outlier',graph_attr(enron, 'LDC_names'))
for(t in 1:length(fTops)) cat('*arcs :',t,' "',tNames[t],'"\n',sep='',file=net)
cat('*arcs\n',file=net); X <- ends(enron,arcs)
start <- "1979-12-31 20:00:00"; s <- minutes(start)
for(a in 1:m)
   cat(as.integer(tops[a]),': ',X[a,1],' ',X[a,2],' 1 [',minutes(T[a])-s,
      '] l "',fTypes[rTypes[a]],'"\n',sep='',file=net)
close(net); close(clu)

enron184.zip

http://kateto.net/networks-r-igraph

notes/data/enron.txt · Last modified: 2016/09/14 04:56 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki