October 25, 2018
We decided to manually inspect all journals with at least one of their names cited at least 200 times.
In Pajek we computed the 2-mode network Cite*WJnc
and determined the vector wIndegJ.vec
with weighted indegrees for journals. We produced the list of candidates for inspection with the following program code in R:
wdir <- "C:/Users/batagelj/work/Python/WoS/SocNet/2018/WoS" setwd(wdir) jt <- 1367558; jf <- 1297133 nj <- jt-jf A <- read.table("WJnc.txt",skip=jf+1,nrows=nj,stringsAsFactors=FALSE) V <- A$V1; T <- A$V2 firstCh <- function(x) ifelse(nchar(x)>0,substr(x,1,1),"€") twoCh <- function(x) substr(x,1,2) firstChs <- function(x) paste(firstCh(strsplit(x," ")[[1]]),collapse="") twoChs <- function(x) paste(twoCh(strsplit(x," ")[[1]]),collapse="") K1 <- sapply(T,firstChs); K2 <- sapply(T,twoChs) J <- data.frame(i=1:length(T),V,one=K1,two=K2) K <- paste(K1,K2,sep=":"); k <- order(K) K1o <- c(as.vector(K1[k]),"sentinel"); K2o <- as.character(K2[k]) Ino <- V[k]; To <- as.character(T[k]); Ko <- c(K[k],"sentinel") nr <- nj+1 F <- read.table("wIndegJ.vec",skip=1,stringsAsFactors=FALSE)$V1 code <- new.env(hash=TRUE,parent=emptyenv()) for(r in 1:nj){ if(!exists(K[r],env=code,inherits=FALSE)) assign(K[r],list(i=0,n=0),env=code) L <- get(K[r],env=code,inherits=FALSE) if(L$n < F[r]){L$i <- r; L$n <- F[r]; assign(K[r],L,env=code)} } L$i <- nr; L$n <- 0; assign("sentinel",L,env=code) jour <- file("journalKord.txt","w") key <- "****"; list <- FALSE for(r in 1:nr){ L <- get(Ko[r],env=code,inherits=FALSE) if(Ko[r]==key){ if(L$n >= 200){ cat(r-1,Ino[r-1],K1o[r-1],K2o[r-1],To[r-1],"\n",file=jour); list <- TRUE} } else { if(list) cat(r-1,Ino[r-1],K1o[r-1],K2o[r-1],To[r-1],"\n",file=jour) list <- FALSE; key <- Ko[r]} } close(jour)
We obtained a list with 5482 lines.
1105 1297475 A AD ADDICTION 1106 1297607 A AD ADOLESCENCE 1107 1300863 A AD ADDICTION.S 1108 1304403 A AD ADVANCES 1109 1307263 A AD ADDICTIONS 1110 1310173 A AD ADVOCATE ... 3922 1297535 AJA AMJAD AM J ADDICTION 3923 1300898 AJA AMJAD AMER J ADDICTION 4071 1297402 AJCP AMJCOPS AM J COMMUN PSYCHOL 4072 1297538 AJCP AMJCOPS AMER J COMMUN PSYCHOL 4073 1308824 AJCP AMJCOPS AM J COMMUNITY PSYCO 4074 1309864 AJCP AMJCOPS AM J COMMON PSYCHOL 4075 1309882 AJCP AMJCOPS AMER J COMMUNITY PSY 4076 1310488 AJCP AMJCOPS AM J COMMUNITY PSYCH 4077 1310569 AJCP AMJCOPS AM J COMM PSYCHOL 4078 1356083 AJCP AMJCOPS AM J COMMUNITY PSYCHOL 4093 1297553 AJDAA AMJDRALAB AM J DRUG ALCOHOL AB 4094 1300210 AJDAA AMJDRALAB AMER J DRUG ALCOHOL ABUSE ... 69505 1312465 WPC WIPECO WIRELESS PERS COMMUN 69506 1358467 WPC WIPECO WIRELESS PERSONAL COMMUNICATIONS 69975 1299724 YS YOSO YOUTH SOC 69976 1307291 YS YOSO YOUTH SOCIALIZATION
If we change the threshold from 200 to 500 we get a shorter list JournalK500.txt
with 3148 lines.
October 27, 2018
To additinally reduce the number of titles to inspect we decided to consider only titles that appeared in at least 3 citations.
> code <- new.env(hash=TRUE,parent=emptyenv()) > for(r in 1:nj){ + if(!exists(K[r],env=code,inherits=FALSE)) assign(K[r],list(i=0,n=0,m=0),env=code) + L <- get(K[r],env=code,inherits=FALSE) + if(L$n < F[r]){L$i <- r; L$m <- L$n; L$n <- F[r]; assign(K[r],L,env=code) + }else if(L$m < F[r]){L$m <- F[r]; assign(K[r],L,env=code)} + } > > nr <- nj+1 > L$i <- nr; L$n <- L$m <- 0; assign("sentinel",L,env=code) > Fo <- F[k] > > jour <- file("journalK200.csv","w") > key <- "****"; list <- FALSE; OK <- FALSE > for(r in 1:nr){ + L <- get(Ko[r],env=code,inherits=FALSE) + if(Ko[r]==key){ + if(OK){list <- TRUE + if(Fo[r-1]>2) cat(r-1,Ino[r-1],Fo[r-1],K2o[r-1],To[r-1],"\n",sep="|",file=jour) + } + } else { + OK <- (L$n >= 200)&&(L$m > 2) + if(list&&(Fo[r-1]>2)) cat(r-1,Ino[r-1],Fo[r-1],K2o[r-1],To[r-1],"\n",sep="|",file=jour) + list <- FALSE; key <- Ko[r]} + } > close(jour)
We get the list journalK200.csv
with 2600 titles, or the list journalK100.csv
with 3714 titles to be inspected.
... 4509|1300860|12|AMJPHAN|AMER J PHYS ANTHROPOL| 4525|1297320|7512|AMJPUHE|AM J PUBLIC HEALTH| 4526|1300835|483|AMJPUHE|AMER J PUBLIC HEALTH| 4551|1297308|2179|AMJPRME|AM J PREV MED| 4552|1299777|46|AMJPRME|AMER J PREV MED| 4571|1297423|2571|AMJPOSC|AM J POLIT SCI| 4572|1298223|235|AMJPOSC|AMER J POLIT SCI| 4622|1297164|168906|AMJSO|AM J SOCIOL| 4624|1301542|14045|AMJSO|AMER J SOCIOL| 4625|1307074|4522|AMJSO|AM J SOCIOLOGY| 4626|1310579|15|AMJSO|AM J SOC| 4778|1298989|236|ARLI|ARTIF LIFE| 4779|1309540|20|ARLI|ARTIFICIAL LIFE| 5028|1312503|611|ACMAAN|ACAD MANAG ANN| 5029|1313000|17|ACMAAN|ACAD MANAGEMENT ANN| ...
The lists are available in journalEq.zip.
To prepare data for equivalence partition of journals from a selected list:
> which(T=="JASA") [1] 14041 > F[14041] [1] 4 > which(T=="CACM") [1] 12756 > F[12756] [1] 61
A list of frequent journal names of length at most 5:
> tl <- nchar(T) > Ts <- T[tl<6]; Fs <- F[tl<6]; length(Ts) [1] 1278 > os <- rev(order(Fs)) > Tso <- Ts[os]; Fso <- Fs[os] > S <- data.frame(Tso,Fso) > S[1:120,] Tso Fso Tso Fso Tso Fso Tso Fso 1 ***** 1495317 31 ICML 96 61 EMU 36 91 ERDE 20 2 AIDS 2525 32 NIPS 96 62 IJCAI 36 92 BMJ 20 3 J ACM 1617 33 SDM 92 63 R J 35 93 HOMME 20 4 LNCS 1129 34 LIBRI 91 64 TEXT 34 94 GAMES 19 5 J DOC 830 35 ETHOS 86 65 MIND 32 95 PEERJ 19 6 MIS Q 359 36 KDD 80 66 FORUM 31 96 TKDD 19 7 OIKOS 358 37 PS 75 67 FOCUS 31 97 THE J 19 8 MAN 354 38 ORYX 69 68 JOGNN 31 98 DRUGS 19 9 PAIN 329 39 ISIS 65 69 INT J 30 99 WMJ 19 10 ICWSM 307 40 WIRED 64 70 Z SOZ 30 100 QUEUE 18 11 AREA 307 41 CACM 61 71 LIB J 29 101 ED EC 18 12 CELL 303 42 PNAS 58 72 TIME 29 102 JOSS 17 13 WWW 280 43 OMEGA 58 73 ZER 28 103 OSDI 17 14 T GIS 265 44 HEART 53 74 REDES 28 104 BIT 17 15 BRAIN 232 45 VINE 50 75 P IRE 28 105 JACM 17 16 SLEEP 208 46 INFO 47 76 TWEB 27 106 ACME 16 17 AMBIO 203 47 FAM J 45 77 ALOMA 27 107 GAIA 16 18 SIGNS 183 48 QUEST 45 78 HUMOR 27 108 TKDE 16 19 KIVA 170 49 EURE 44 79 POLIS 27 109 ARDEA 16 20 JAIDS 162 50 CITY 44 80 ICONO 26 110 CORR 15 21 PVLDB 159 51 WORD 44 81 ACL 25 111 PRISM 15 22 AUK 158 52 GUT 43 82 PMLA 25 112 GENUS 15 23 J BUS 156 53 VET J 42 83 NSDI 24 113 J AIS 15 24 IBIS 149 54 META 41 84 SUCHT 23 114 PRINT 15 25 YOUNG 148 55 BLOOD 41 85 ARBOR 23 115 NOUS 15 26 CHEST 140 56 TELOS 41 86 P T 22 116 SIGIR 14 27 SPINE 135 57 AGING 41 87 EJBRM 21 117 EC B 14 28 CHAOS 118 58 VLDB 40 88 PALEO 21 118 EDBT 14 29 AAAI 111 59 JASSS 38 89 GENE 21 119 RQ 14 30 WORK 103 60 ELT J 37 90 BONE 20 120 ZYGON 14
We manually edited the filter file. Using the following program in R (replace test.csv
with journalK100final.csv
) we create the corresponding Pajek partition:
> wdir <- "C:/Users/batagelj/work/Python/WoS/SocNet/2018/WoS" > setwd(wdir) > jt <- 1367558; jf <- 1297133 > nj <- jt-jf > D <- read.table("test.csv",sep="|",stringsAsFactors=FALSE) > head(D) V1 V2 V3 V4 V5 V6 1 1631 1298694 1057 AMAN AM ANTHROPOL NA 2 1633 1300624 3 AMAN AMER ANTHROPOL NA ... > D <- D[,1:5] > names(D) <- c("i","node","f","code","name") > head(D) i node f code name 1 1631 1298694 1057 AMAN AM ANTHROPOL 2 1633 1300624 3 AMAN AMER ANTHROPOL 3 2164 1297551 834 ANBEME ANN BEHAV MED 4 2165 1304172 43 ANBEME ANN BEHAVIORAL MED 5 2225 1297290 2501 AMBESC AM BEHAV SCI 6 2226 1300715 356 AMBESC AMER BEHAV SCI > > jouC <- file("journals.clu","w") > clu <- 1:nj; key <- "****" > for(r in 1:nrow(D)) if(D$code[r]==key) clu[D$node[r]-jf] <- jClass else + {key <- D$code[r]; jClass <- D$node[r] - jf} > i1 <- 1298694-jf; i2 <- 1300624-jf > i1 [1] 1561 > i2 [1] 3491 > clu[i2] [1] 1561 > cat(paste("*vertices ",nj,sep=""),clu,sep="\n",file=jouC) > close(jouC)
> jt <- 1367558; jf <- 1297133 > nj <- jt-jf > A <- read.table("WJnc.txt",skip=jf+1,nrows=nj,stringsAsFactors=FALSE) > V <- A$V1; T <- A$V2 > firstCh <- function(x) ifelse(nchar(x)>0,substr(x,1,1),"€") > twoCh <- function(x) substr(x,1,2) > firstChs <- function(x) paste(firstCh(strsplit(x," ")[[1]]),collapse="") > twoChs <- function(x) paste(twoCh(strsplit(x," ")[[1]]),collapse="") > K1 <- sapply(T,firstChs); K2 <- sapply(T,twoChs) > J <- data.frame(i=1:length(T),V,one=K1,two=K2) > K <- paste(K1,K2,sep=":"); k <- order(K) > K2o <- as.character(K2[k]); Ino <- V[k]; To <- as.character(T[k]); Ko <- K[k] > F <- read.table("wIndegJ.vec",skip=1,stringsAsFactors=FALSE)$V1 > Fo <- F[k] > jlst <- file("journalList.csv","w") > for(r in 1:nj) cat(r,Ino[r],Ino[r]-jf,Fo[r],K2o[r],To[r],"\n",sep="|",file=jlst) > close(jlst)