====== Networks ======
[[..:alex|OpenAlex]]; [[..:alex:paj|OpenAlex 2 Pajek]]
===== Functions OpenAlex =====
https://github.com/bavla/OpenAlex/blob/main/code/OpenAlex.R
# OpenAlex
# https://github.com/bavla/OpenAlex/tree/main/code
# http://vladowiki.fmf.uni-lj.si/doku.php?id=vlado:work:bib:alex
# by Vladimir Batagelj, March 2024
# source("https://raw.githubusercontent.com/bavla/OpenAlex/main/OpenAlex.R")
keys = ls
eDict <- function(size=10000L) new.env(hash=TRUE,parent=emptyenv(),size=size)
getVals <- Vectorize(get,vectorize.args="x")
dict2DF <- function(dict,ind) {
V <- as.data.frame(t(getVals(keys(dict),dict)))
V[[ind]] <- as.integer(unname(V[[ind]]))
return(V[order(V[[ind]]),])
}
putWork <- function(Wid,sWname=""){
if(exists(Wid,env=works,inherits=FALSE)){
if(works[[Wid]]["sWname"]!=sWname){
if(works[[Wid]]["sWname"]=="") {works[[Wid]]["sWname"] <- sWname} else {
cat("W",length(works),works[[Wid]]["sWname"],sWname,"\n",file=WC$tr) }}
} else works[[Wid]] <- c(wind=length(works)+1,sWname=sWname)
return(works[[Wid]]["wind"])
}
putSrc <- function(Sid,Sname=NA){
if(exists(Sid,env=srces,inherits=FALSE)){
if(srces[[Sid]]["Sname"]!=Sname){
if(is.na(srces[[Sid]]["Sname"])) {srces[[Sid]]["Sname"] <- Sname} else {
cat("S",length(srces),srces[[Sid]]["Sname"],Sname,"\n",file=WC$tr) }}
} else srces[[Sid]] <- c(sind=length(srces)+1,Sname=Sname)
return(srces[[Sid]]["sind"])
}
putAuth <- function(Aid,Aname=NA){
sAnam <- ifelse(is.na(Aname),NA,sAname(Aname))
if(exists(Aid,env=auths,inherits=FALSE)){
if(auths[[Aid]]["Aname"]!=Aname){
if(is.na(auths[[Aid]]["Aname"])) {auths[[Aid]]["Aname"] <- Aname} else {
cat("A",length(auths),auths[[Aid]]["Aname"],Aname,"\n",file=WC$tr) }}
} else auths[[Aid]] <- c(aind=length(auths)+1,Aname=Aname,sAname=sAnam)
return(auths[[Aid]]["aind"])
}
.Ty <- c("article"="AR","book-chapter"="BC","dissertation"="DS","book"="BK","dataset"="DS",
"paratext"="PT","other"="OT","reference-entry"="RE","report"="RP","peer-review"="PR",
"standard"="ST","editorial"="ED","erratum"="ER","grant"="GR","letter"="LT")
getID <- function(URLid) substring(URLid,22)
firstup <- function(n) {n <- tolower(n); substr(n,1,1) <- toupper(substr(n,1,1)); n}
Gname <- function(name,ty,py,vl,fp){L <- firstup(unlist(strsplit(name," "))); k <- length(L)
H <- paste(substr(L[k],1,8),paste(substr(L[1:(k-1)],1,1),sep="",collapse=""),sep="_")
if(ty=="article") paste(H,"(",py,")",vl,":",fp,sep="") else
paste(H,"(",py,")",.Ty[ty],sep="")
}
sAname <- function(name){L <- firstup(unlist(strsplit(name," "))); k <- length(L)
H <- paste(L[k],paste(substr(L[1:(k-1)],1,1),sep="",collapse=""))
}
openWorks <- function(query=NULL,list=NULL,file=NULL){
WC <<- new.env(hash=TRUE,parent=emptyenv())
WC$works <- "https://api.openalex.org/works"
WC$Q <- query; WC$L <- list; WC$f <- file
WC$n <- 0; WC$l <- 0; WC$m <- 0; WC$an <- 0
WC$tr <- file("trace.txt","w",encoding="UTF-8")
cat("% OpenAlex",date(),"\n",file=WC$tr)
if(length(query[["search"]])>0) {
WC$k <- 0; WC$nr <- 0; WC$act <- "page"
if(length(query[["per_page"]])==0) WC$Q$per_page <- "200"
WC$Q$cursor <- "*"
} else if(length(list)>0) { WC$act <- "list"
} else if(length(file)>0) { WC$act <- "open"
} else WC$act <- "stop"
}
nextWork <- function(){
# repeat{
for(t in 1:5){
switch(WC$act,
"page" = {
# if(WC$n==10) {WC$act <- "list"; next}
WC$k <- WC$k + 1
if(WC$k>WC$nr){
WC$wd <- GET(WC$works,query=WC$Q)
if(WC$wd$status_code!=200) {WC$act <- "list"
cat(WC$n,"GET error\n"); flush.console(); next}
WC$k <- 1
WC$wc <- fromJSON(rawToChar(WC$wd$content))
WC$Q$cursor <- WC$wc$meta$next_cursor
if(is.null(WC$Q$cursor)) {WC$act <- "list"; next}
WC$df <- WC$wc$results; WC$nr <- nrow(WC$df)
# cat(WC$k,wc$meta$count,WC$nr,"\n ",WC$Q$cursor,"\n"); flush.console()
}
WC$n <- WC$n + 1
return(WC$df[WC$k,])
},
"list" = {
WC$l <- WC$l + 1
if(WC$l>length(WC$L)) {WC$act <- "open"; next}
works <- paste(WC$works,"/",WC$L[WC$l],sep="")
WC$wd <- GET(works,query=list(select=WC$Q[["select"]]))
if(WC$wd$status_code!=200) {cat(WC$n,"GET error\n")
flush.console(); next}
# cat(" >>>",WC$l,WC$L[WC$l],"\n"); flush.console()
wc <- fromJSON(rawToChar(WC$wd$content))
WC$n <- WC$n + 1
return(wc)
},
"open" = {
if(is.null(WC$f)) { WC$act <- "stop"; next }
WC$ndj <- file(WC$f,open="r")
WC$act <- "file"; next
},
"file" = {
wc <- readLines(con=WC$ndj,n=1)
if(length(wc)==0){ close(WC$ndj); WC$act <- "stop"; next }
WC$m <- WC$m + 1; WC$n <- WC$n + 1
return(fromJSON(wc))
},
"stop" = { return(NULL) },
stop(paste0("No handler for ",WC$act))
)
}
stop("Too many errors")
}
processWork <- function(w) {
# cat(" Process:",WC$n,w$title,"\n"); flush.console()
Wid <- getID(w$id); hit <- TRUE
Sid <- getID(w$primary_location$source$id)
Sname <- w$primary_location$source$display_name
pYear <- w$publication_year; pDate <- w$publication_date
type <- w$type; lang <- w$language
vol <- w$biblio$volume; iss <- w$biblio$issue
fPage <- w$biblio$first_page; lPage <- w$biblio$last_page
title <- w$title; tit <- gsub(";",",",title)
autsh <- w$authorships[[1]]
if(nrow(autsh)==0) { cat("W",WC$n,"no authors info\n",file=WC$tr)
WC$an <- WC$an + 1; fAName <- paste("Anon",WC$an,sep="")
} else { fAName <- w$authorships$author$display_name[1]
if(length(w$authorships)==1) fAName <- w$authorships[[1]]$author$display_name[1]}
sWname <- Gname(fAName,type,pYear,vol,fPage)
u <- putWork(Wid,sWname)
# cat(u,Wid,hit,sWname,Sid,pYear,pDate,type,lang,vol,iss,fPage,lPage,fAName,tit,sep=";","\n"); flush.console()
if(!is.na(Sid)) {j <- putSrc(Sid,Sname); cat(u,j,"\n",file=wj)}
cat(u,Wid,hit,sWname,Sid,pYear,pDate,type,lang,vol,iss,fPage,lPage,fAName,tit,
sep=";",file=wrk); cat("\n",file=wrk)
refs <- w$referenced_works
if(length(w$referenced_works)==1) refs <- w$referenced_works[[1]]
for(wk in refs) {
vid <- getID(wk); v <- putWork(vid,"")
cat(v,vid,FALSE,"",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,sep=";",file=wrk)
cat("\n",file=wrk); cat(u,v,"\n",file=ci) }
if(nrow(autsh)==0) {
v <- putAuth(fAName,Aname=fAName); cat(u,v,"\n",file=wa)
} else {
auts <- w$authorships$author
if(is.null(auts)) auts <- w$authorships[[1]]$author
for(a in 1:nrow(auts)) {
Aid <- getID(auts$id[a]); v <- putAuth(Aid,Aname=auts$display_name[a])
cat(u,v,"\n",file=wa) } }
}
closeWorks <- function() {close(WC$tr); rm(WC,inherits=TRUE)}
===== Program OpenAlex2Pajek =====
https://github.com/bavla/OpenAlex/blob/main/code/OpenAlex2Pajek.R
# OpenAlex2Pajek
# source("OpenAlex2Pajek.R")
library(httr)
library(jsonlite)
source("https://raw.githubusercontent.com/bavla/OpenAlex/main/code/OpenAlex.R")
# source("OpenAlex.R")
# VBlist <- read.table("VladoWorks.csv")$V1
Q <- list(
search="handball",
# filter="publication_year:2015",
select="id,primary_location,publication_year,publication_date,type,language,biblio,title,authorships,countries_distinct_count,cited_by_count,referenced_works_count,referenced_works",
# select="id,title,countries_distinct_count,cited_by_count,referenced_works_count",
per_page="200"
# per_page="3"
)
save <- TRUE; step <- 500
if(save) json <- file("save.ndjson","w",encoding="UTF-8")
cat("OpenAlex2Pajek - Start",date(),"\n")
ci <- file("Ci.tmp","w",encoding="UTF-8"); wa <- file("WA.tmp","w",encoding="UTF-8")
wj <- file("WJ.tmp","w",encoding="UTF-8"); wrk <- file("works.csv","w",encoding="UTF-8")
cat("% OpenAlex",date(),"\n",file=wa); cat("% OpenAlex",date(),"\n",file=wj)
cat("% OpenAlex",date(),"\n",file=ci); cat("% OpenAlex",date(),"\n",file=wrk)
cat("ind;Wid;hit;sWname;Sid;pYear;pDate;type;lang;vol;iss;fPage;lPage;fAName;title\n",file=wrk)
works <- eDict(); srces <- eDict(); auths <- eDict();
# openWorks(query=Q,list=VBlist,file="manual.ndjson")
# openWorks(query=Q,list=NULL,file="save.ndjson")
# openWorks(query=Q,list=VBlist,file=NULL)
openWorks(query=Q,list=NULL,file=NULL)
# print(ls.str(WC))
cat("*** OpenAlex2Pajek - Start",date(),"\n"); flush.console()
repeat{
w <- nextWork()
if(is.null(w)) break
if(save) write(toJSON(w),file=json)
if(WC$n %% step==0) cat(date()," n =",WC$n,"\n"); flush.console()
# tryCatch(
processWork(w) #,
# error=function(e){ cat("W",WC$n,w$id,"\n"); flush.console(); print(e)} )
}
cat("*** OpenAlex2Pajek - Stop",date(),"\n"); flush.console()
# print(ls.str(WC))
close(ci); close(wa); close(wj); close(wrk)
if(save) close(json)
cat("hits:",WC$n,"works:",length(works),"authors:",length(auths),
"anon:",WC$an,"sources:",length(srces),"\n")
# Citation Cite
U <- dict2DF(works,"wind")
n <- nrow(U)
net <- file("Cite.net","w",encoding="UTF-8"); cat('\xEF\xBB\xBF',file=net)
nam <- file("Works.nam","w",encoding="UTF-8"); cat('\xEF\xBB\xBF',file=nam)
cat("% OpenAlex2Pajek",date(),"\n*vertices",n,"\n",file=net)
cat("% OpenAlex2Pajek",date(),"\n*vertices",n,"\n",file=nam)
Ci <- read.csv("Ci.tmp",sep="",head=FALSE,skip=1,encoding="UTF-8")
for(i in 1:n){
cat(i,' "',row.names(U)[i],'"\n',sep="",file=net)
cat(i,' "',ifelse(U[["sWname"]][i]=="",row.names(U)[i],U[["sWname"]][i]),'"\n',sep="",file=nam)
}
cat("*arcs\n",file=net)
for(i in 1:nrow(Ci)) cat(Ci$V1[i],Ci$V2[i],"\n",file=net)
close(net); close(nam); rm(Ci)
# Authorship WA
A <- dict2DF(auths,"aind")
m <- nrow(A)
net <- file("WA.net","w",encoding="UTF-8"); cat('\xEF\xBB\xBF',file=net)
nam <- file("Authors.nam","w",encoding="UTF-8"); cat('\xEF\xBB\xBF',file=nam)
cat("% OpenAlex2Pajek",date(),"\n*vertices",n+m,n,"\n",file=net)
cat("% OpenAlex2Pajek",date(),"\n*vertices",m,"\n",file=nam)
WA <- read.csv("WA.tmp",sep="",head=FALSE,skip=1,encoding="UTF-8")
for(i in 1:n) cat(i,' "',row.names(U)[i],'"\n',sep="",file=net)
for(i in 1:m){
cat(n+i,' "',row.names(A)[i],'"\n',sep="",file=net)
cat(i,' "',ifelse(A[["sAname"]][i]=="",row.names(A)[i],A[["sAname"]][i]),'"\n',sep="",file=nam)
}
cat("*arcs\n",file=net)
for(i in 1:nrow(WA)) cat(WA$V1[i],n+WA$V2[i],"\n",file=net)
close(net); close(nam); rm(WA)
# Sources WJ
J <- dict2DF(srces,"sind")
m <- nrow(J)
net <- file("WJ.net","w",encoding="UTF-8"); cat('\xEF\xBB\xBF',file=net)
nam <- file("Sources.nam","w",encoding="UTF-8"); cat('\xEF\xBB\xBF',file=nam)
cat("% OpenAlex2Pajek",date(),"\n*vertices",n+m,n,"\n",file=net)
cat("% OpenAlex2Pajek",date(),"\n*vertices",m,"\n",file=nam)
WJ <- read.csv("WJ.tmp",sep="",head=FALSE,skip=1,encoding="UTF-8")
for(i in 1:n) cat(i,' "',row.names(U)[i],'"\n',sep="",file=net)
for(i in 1:m){
cat(n+i,' "',row.names(J)[i],'"\n',sep="",file=net)
cat(i,' "',ifelse(J[["Sname"]][i]=="",row.names(J)[i],J[["Sname"]][i]),'"\n',sep="",file=nam)
}
cat("*arcs\n",file=net)
for(i in 1:nrow(WJ)) cat(WJ$V1[i],n+WJ$V2[i],"\n",file=net)
close(net); close(nam); rm(WJ)
# closeWorks()
===== Creating Handball Pajek networks =====
https://github.com/bavla/OpenAlex/raw/main/data/handball.zip
> source("OpenAlex2Pajek.R")
OpenAlex2Pajek - Start Mon Mar 18 05:34:58 2024
*** OpenAlex2Pajek - Start Mon Mar 18 05:34:58 2024
Mon Mar 18 05:35:07 2024 n = 500
Mon Mar 18 05:35:15 2024 n = 1000
Mon Mar 18 05:35:25 2024 n = 1500
Mon Mar 18 05:35:33 2024 n = 2000
Mon Mar 18 05:35:45 2024 n = 2500
Mon Mar 18 05:35:53 2024 n = 3000
...
Mon Mar 18 05:53:38 2024 n = 23500
Mon Mar 18 05:54:04 2024 n = 24000
Mon Mar 18 05:54:33 2024 n = 24500
Mon Mar 18 05:55:02 2024 n = 25000
Mon Mar 18 05:55:51 2024 n = 25500
*** OpenAlex2Pajek - Stop Mon Mar 18 05:56:06 2024
hits: 25861 works: 233471 authors: 52643 anon: 1325 sources: 5510
===== Changes =====
- value val in putDict functions was changed from named vector to a named list (March 19, 2024)
- added partitions pYear, hit, type, lang (version 1. March 22, 2024)
===== To do =====
- add the type of work to the works dictionary (March 22, 2024)
- 1e+05 -> 100000; 2e+05 -> 200000
- add keywords network WK
- add countries network WL (L - location)
\\
[[..:alex|OpenAlex]]; [[..:alex:paj|OpenAlex 2 Pajek]]