Equivalent journal names

Equivalent journal names

First attempt

October 25, 2018

We decided to manually inspect all journals with at least one of their names cited at least 200 times.

In Pajek we computed the 2-mode network Cite*WJnc and determined the vector wIndegJ.vec with weighted indegrees for journals. We produced the list of candidates for inspection with the following program code in R:

wdir <- "C:/Users/batagelj/work/Python/WoS/SocNet/2018/WoS"
setwd(wdir)
jt <- 1367558; jf <- 1297133
nj <- jt-jf
A <- read.table("WJnc.txt",skip=jf+1,nrows=nj,stringsAsFactors=FALSE)

V <- A$V1; T <- A$V2
firstCh <- function(x) ifelse(nchar(x)>0,substr(x,1,1),"€")
twoCh <- function(x) substr(x,1,2)
firstChs <- function(x) paste(firstCh(strsplit(x," ")[[1]]),collapse="")
twoChs <- function(x) paste(twoCh(strsplit(x," ")[[1]]),collapse="")
K1 <- sapply(T,firstChs); K2 <- sapply(T,twoChs)
J <- data.frame(i=1:length(T),V,one=K1,two=K2)

K <- paste(K1,K2,sep=":"); k <- order(K)
K1o <- c(as.vector(K1[k]),"sentinel"); K2o <- as.character(K2[k])
Ino <- V[k]; To <- as.character(T[k]); Ko <- c(K[k],"sentinel")
nr <- nj+1

F <- read.table("wIndegJ.vec",skip=1,stringsAsFactors=FALSE)$V1
code <- new.env(hash=TRUE,parent=emptyenv())
for(r in 1:nj){
  if(!exists(K[r],env=code,inherits=FALSE)) assign(K[r],list(i=0,n=0),env=code)
  L <- get(K[r],env=code,inherits=FALSE)
  if(L$n < F[r]){L$i <- r; L$n <- F[r]; assign(K[r],L,env=code)} 
}

L$i <- nr; L$n <- 0; assign("sentinel",L,env=code)
jour <- file("journalKord.txt","w")
key <- "****"; list <- FALSE
for(r in 1:nr){
  L <- get(Ko[r],env=code,inherits=FALSE)
  if(Ko[r]==key){
    if(L$n >= 200){
    cat(r-1,Ino[r-1],K1o[r-1],K2o[r-1],To[r-1],"\n",file=jour); list <- TRUE}
  } else {
    if(list) cat(r-1,Ino[r-1],K1o[r-1],K2o[r-1],To[r-1],"\n",file=jour)
    list <- FALSE; key <- Ko[r]}
}
close(jour)

We obtained a list with 5482 lines.

1105 1297475 A AD ADDICTION 
1106 1297607 A AD ADOLESCENCE 
1107 1300863 A AD ADDICTION.S 
1108 1304403 A AD ADVANCES 
1109 1307263 A AD ADDICTIONS 
1110 1310173 A AD ADVOCATE 
...
3922 1297535 AJA AMJAD AM J ADDICTION 
3923 1300898 AJA AMJAD AMER J ADDICTION 
4071 1297402 AJCP AMJCOPS AM J COMMUN PSYCHOL 
4072 1297538 AJCP AMJCOPS AMER J COMMUN PSYCHOL 
4073 1308824 AJCP AMJCOPS AM J COMMUNITY PSYCO 
4074 1309864 AJCP AMJCOPS AM J COMMON PSYCHOL 
4075 1309882 AJCP AMJCOPS AMER J COMMUNITY PSY 
4076 1310488 AJCP AMJCOPS AM J COMMUNITY PSYCH 
4077 1310569 AJCP AMJCOPS AM J COMM PSYCHOL 
4078 1356083 AJCP AMJCOPS AM J COMMUNITY PSYCHOL 
4093 1297553 AJDAA AMJDRALAB AM J DRUG ALCOHOL AB 
4094 1300210 AJDAA AMJDRALAB AMER J DRUG ALCOHOL ABUSE 
...
69505 1312465 WPC WIPECO WIRELESS PERS COMMUN 
69506 1358467 WPC WIPECO WIRELESS PERSONAL COMMUNICATIONS 
69975 1299724 YS YOSO YOUTH SOC 
69976 1307291 YS YOSO YOUTH SOCIALIZATION

If we change the threshold from 200 to 500 we get a shorter list JournalK500.txt with 3148 lines.

Additional filter

October 27, 2018

To additinally reduce the number of titles to inspect we decided to consider only titles that appeared in at least 3 citations.

> code <- new.env(hash=TRUE,parent=emptyenv())
> for(r in 1:nj){
+   if(!exists(K[r],env=code,inherits=FALSE)) assign(K[r],list(i=0,n=0,m=0),env=code)
+   L <- get(K[r],env=code,inherits=FALSE)
+   if(L$n < F[r]){L$i <- r; L$m <- L$n; L$n <- F[r]; assign(K[r],L,env=code)
+   }else if(L$m < F[r]){L$m <- F[r]; assign(K[r],L,env=code)}
+ }
>  
> nr <- nj+1
> L$i <- nr; L$n <- L$m <- 0; assign("sentinel",L,env=code)
> Fo <- F[k] 
> 
> jour <- file("journalK200.csv","w")
> key <- "****"; list <- FALSE; OK <- FALSE
> for(r in 1:nr){
+   L <- get(Ko[r],env=code,inherits=FALSE)
+   if(Ko[r]==key){
+     if(OK){list <- TRUE
+       if(Fo[r-1]>2) cat(r-1,Ino[r-1],Fo[r-1],K2o[r-1],To[r-1],"\n",sep="|",file=jour)
+     }
+   } else {
+     OK <- (L$n >= 200)&&(L$m > 2)
+     if(list&&(Fo[r-1]>2)) cat(r-1,Ino[r-1],Fo[r-1],K2o[r-1],To[r-1],"\n",sep="|",file=jour)
+     list <- FALSE; key <- Ko[r]}
+ }
> close(jour)

We get the list journalK200.csv with 2600 titles, or the list journalK100.csv with 3714 titles to be inspected.

...
4509|1300860|12|AMJPHAN|AMER J PHYS ANTHROPOL|
4525|1297320|7512|AMJPUHE|AM J PUBLIC HEALTH|
4526|1300835|483|AMJPUHE|AMER J PUBLIC HEALTH|
4551|1297308|2179|AMJPRME|AM J PREV MED|
4552|1299777|46|AMJPRME|AMER J PREV MED|
4571|1297423|2571|AMJPOSC|AM J POLIT SCI|
4572|1298223|235|AMJPOSC|AMER J POLIT SCI|
4622|1297164|168906|AMJSO|AM J SOCIOL|
4624|1301542|14045|AMJSO|AMER J SOCIOL|
4625|1307074|4522|AMJSO|AM J SOCIOLOGY|
4626|1310579|15|AMJSO|AM J SOC|
4778|1298989|236|ARLI|ARTIF LIFE|
4779|1309540|20|ARLI|ARTIFICIAL LIFE|
5028|1312503|611|ACMAAN|ACAD MANAG ANN|
5029|1313000|17|ACMAAN|ACAD MANAGEMENT ANN|
...

The lists are available in journalEq.zip.

To prepare data for equivalence partition of journals from a selected list:

remove lines that have not other equivalent line in the list;
group all lines belonging to the same class together;
put the canonical line of the class as the first line of the group of lines with the same code;
if for the same code some classes exist extend the code by numbers 1, 2, 3, … to get a unique code for each class;
indicate problematic lines with ??? in their beginning;
some journal names can be abbreviated - for example:
- CACM = Communications of the Association for Computing Machinery
- JACM = J ACM = Journal of the Association for Computing Machinery
- JASA = Journal of the American Statistical Association
- LNCS, NIPS, JASSS, IJCAI, BMJ, JOSS, …

Abbreviated names

> which(T=="JASA")
[1] 14041
> F[14041]
[1] 4
> which(T=="CACM")
[1] 12756
> F[12756]
[1] 61

A list of frequent journal names of length at most 5:

> tl <- nchar(T)
> Ts <- T[tl<6]; Fs <- F[tl<6]; length(Ts)
[1] 1278
> os <- rev(order(Fs))
> Tso <- Ts[os]; Fso <- Fs[os]
> S <- data.frame(Tso,Fso)
> S[1:120,]
      Tso     Fso           Tso Fso           Tso Fso           Tso Fso
1   ***** 1495317     31   ICML  96     61    EMU  36     91   ERDE  20
2    AIDS    2525     32   NIPS  96     62  IJCAI  36     92    BMJ  20
3   J ACM    1617     33    SDM  92     63    R J  35     93  HOMME  20
4    LNCS    1129     34  LIBRI  91     64   TEXT  34     94  GAMES  19
5   J DOC     830     35  ETHOS  86     65   MIND  32     95  PEERJ  19
6   MIS Q     359     36    KDD  80     66  FORUM  31     96   TKDD  19
7   OIKOS     358     37     PS  75     67  FOCUS  31     97  THE J  19
8     MAN     354     38   ORYX  69     68  JOGNN  31     98  DRUGS  19
9    PAIN     329     39   ISIS  65     69  INT J  30     99    WMJ  19
10  ICWSM     307     40  WIRED  64     70  Z SOZ  30     100 QUEUE  18
11   AREA     307     41   CACM  61     71  LIB J  29     101 ED EC  18
12   CELL     303     42   PNAS  58     72   TIME  29     102  JOSS  17
13    WWW     280     43  OMEGA  58     73    ZER  28     103  OSDI  17
14  T GIS     265     44  HEART  53     74  REDES  28     104   BIT  17
15  BRAIN     232     45   VINE  50     75  P IRE  28     105  JACM  17
16  SLEEP     208     46   INFO  47     76   TWEB  27     106  ACME  16
17  AMBIO     203     47  FAM J  45     77  ALOMA  27     107  GAIA  16
18  SIGNS     183     48  QUEST  45     78  HUMOR  27     108  TKDE  16
19   KIVA     170     49   EURE  44     79  POLIS  27     109 ARDEA  16
20  JAIDS     162     50   CITY  44     80  ICONO  26     110  CORR  15
21  PVLDB     159     51   WORD  44     81    ACL  25     111 PRISM  15
22    AUK     158     52    GUT  43     82   PMLA  25     112 GENUS  15
23  J BUS     156     53  VET J  42     83   NSDI  24     113 J AIS  15
24   IBIS     149     54   META  41     84  SUCHT  23     114 PRINT  15
25  YOUNG     148     55  BLOOD  41     85  ARBOR  23     115  NOUS  15
26  CHEST     140     56  TELOS  41     86    P T  22     116 SIGIR  14
27  SPINE     135     57  AGING  41     87  EJBRM  21     117  EC B  14
28  CHAOS     118     58   VLDB  40     88  PALEO  21     118  EDBT  14
29   AAAI     111     59  JASSS  38     89   GENE  21     119    RQ  14
30   WORK     103     60  ELT J  37     90   BONE  20     120 ZYGON  14

Creating Pajek partition

We manually edited the filter file. Using the following program in R (replace test.csv with journalK100final.csv) we create the corresponding Pajek partition:

> wdir <- "C:/Users/batagelj/work/Python/WoS/SocNet/2018/WoS"
> setwd(wdir)
> jt <- 1367558; jf <- 1297133
> nj <- jt-jf
> D <- read.table("test.csv",sep="|",stringsAsFactors=FALSE)
> head(D)
    V1      V2   V3     V4                 V5 V6
1 1631 1298694 1057   AMAN       AM ANTHROPOL NA
2 1633 1300624    3   AMAN     AMER ANTHROPOL NA
...
> D <- D[,1:5]
> names(D) <- c("i","node","f","code","name")
> head(D)
     i    node    f   code               name
1 1631 1298694 1057   AMAN       AM ANTHROPOL
2 1633 1300624    3   AMAN     AMER ANTHROPOL
3 2164 1297551  834 ANBEME      ANN BEHAV MED
4 2165 1304172   43 ANBEME ANN BEHAVIORAL MED
5 2225 1297290 2501 AMBESC       AM BEHAV SCI
6 2226 1300715  356 AMBESC     AMER BEHAV SCI
> 
> jouC <- file("journals.clu","w")
> clu <- 1:nj; key <- "****" 
> for(r in 1:nrow(D)) if(D$code[r]==key) clu[D$node[r]-jf] <- jClass else 
+   {key <- D$code[r]; jClass <- D$node[r] - jf}
> i1 <- 1298694-jf; i2 <- 1300624-jf
> i1
[1] 1561
> i2
[1] 3491
> clu[i2]
[1] 1561
> cat(paste("*vertices ",nj,sep=""),clu,sep="\n",file=jouC)
> close(jouC)

List of all journals

> jt <- 1367558; jf <- 1297133
> nj <- jt-jf
> A <- read.table("WJnc.txt",skip=jf+1,nrows=nj,stringsAsFactors=FALSE) 
> V <- A$V1; T <- A$V2
> firstCh <- function(x) ifelse(nchar(x)>0,substr(x,1,1),"€")
> twoCh <- function(x) substr(x,1,2)
> firstChs <- function(x) paste(firstCh(strsplit(x," ")[[1]]),collapse="")
> twoChs <- function(x) paste(twoCh(strsplit(x," ")[[1]]),collapse="")
> K1 <- sapply(T,firstChs); K2 <- sapply(T,twoChs)
> J <- data.frame(i=1:length(T),V,one=K1,two=K2)
> K <- paste(K1,K2,sep=":"); k <- order(K)
> K2o <- as.character(K2[k]); Ino <- V[k]; To <- as.character(T[k]); Ko <- K[k] 
> F <- read.table("wIndegJ.vec",skip=1,stringsAsFactors=FALSE)$V1
> Fo <- F[k]
> jlst <- file("journalList.csv","w")
> for(r in 1:nj) cat(r,Ino[r],Ino[r]-jf,Fo[r],K2o[r],To[r],"\n",sep="|",file=jlst)
> close(jlst)