Citation network

C:\Users\batagelj\work\Python\WoS\daria\Excel
C:\Users\batagelj\Documents\2017\malceva\elib

Importing data

3. October 2017

Basic works

Most of the data about 5227 basic works are available in the file Articles.xls. The table has 25 columns

 1 A ID                          14 N Page - Last
 2 B Article                     15 O Citing - RSCI
 3 C Link eLibrary               16 P Citing - Web of Science
 4 D ID - 1`st author            17 Q Citing - Scopus
 5 E 1`st author                 18 R Key words
 6 F Financial support           19 S Key words (normalized)
 7 G Type                        20 T Abstract
 8 H Language                    21 U Abstract (normalized)
 9 I ID - Journal                22 V Abstract - English
10 J Journal                     23 W Abstract - English (normalized)
11 K Volume                      24 X Size (in signs)
12 L Year                        25 Y Discipline
13 M Page - first

From this table we can get all components needed in the ISI name of a work

AU + ’, ’ + PY + ’, ’ + SO[:20] + ’, V’ + VL + ’, P’ + BP

and in the short name

LastNm[:8] + ’ ’ + FirstNm[0] + ’(’ + PY + ’)’ + VL + ’:’ + BP

In R there are several packages for importing Excel tables (XLConnect, xlsx, gdata, Readxl, etc.). We will use the package readxl (description).

> library(readxl)
> aFile <- "C:/Users/batagelj/Documents/2017/malceva/elib/Articles.xlsx"
> atypes <- c("text","text","skip","skip","text",rep("skip",4),rep("text",5),rep("skip",11))
> atypes
 [1] "text" "text" "skip" "skip" "text" "skip" "skip" "skip" "skip" "text" "text" "text" "text" "text" 
     "skip" "skip" "skip" "skip" "skip" "skip" "skip" "skip" "skip" "skip" "skip"
> af <- read_excel(aFile,sheet=1,col_types=atypes)
> dim(af)
[1] 5227    8
> nb <- nrow(af)
> nb
[1] 5227
> short <- vector(mode="character",length=nb)
> Last <- vector(mode="character",length=nb)
> Init <- vector(mode="character",length=nb)
> S <- strsplit(af[[3]]," ") 
> S[1]
[[1]]
[1] "ЗЫРЯНОВ"     "СЕРГЕЙ"      "ГРИГОРЬЕВИЧ"
> for(i in 1:nb) {Last[i] <- substr(S[[i]][1],1,8); Init[i] <- substr(S[[i]][2],1,1)} 
> for(i in 1:nb) short[i] <- paste(Last[i],"_",Init[i],"(",af[i,6],")",af[i,5],":",af[i,7],sep="")
> head(short)
[1] "ЗЫРЯНОВ_С(2008)4:24"   "ДАВЫДЕНК_В(2007)3:74"  "FARMER_J(1999)1:49"    "NA_NA(2006)12:1687"    
    "PISELLI_F(2007)7:867"  "KOKSAL_Y(2008)10:1498"
> isi <- vector(mode="character",length=nb)
> for(i in 1:nb) isi[i] <- paste(af[i,3],", ",af[i,6],", ",substr(af[i,4],1,20),", V",af[i,5],", P",af[i,7],sep="") 
> head(isi)
[1] "ЗЫРЯНОВ СЕРГЕЙ ГРИГОРЬЕВИЧ, 2008, ВЕСТНИК ТАМБОВСКОГО , V4, P24"       
[2] "ДАВЫДЕНКО ВЛАДИМИР АЛЕКСАНДРОВИЧ, 2007, ИЗВЕСТИЯ УРАЛЬСКОГО , V3, P74"
[3] "FARMER J., 1999, INTERNATIONAL JOURNA, V1, P49"                        
[4] "NA, 2006, AMERICAN BEHAVIORAL , V12, P1687"                           
[5] "PISELLI FORTUNATA, 2007, AMERICAN BEHAVIORAL , V7, P867"               
[6] "KOKSAL YONCA, 2008, AMERICAN BEHAVIORAL , V10, P1498"                 

Coauthors

> waFile <- "C:/Users/batagelj/Documents/2017/malceva/elib/WA.net"
> N <- as.vector(read.table(waFile,sep=" ",skip=4,nrows=9207,stringsAsFactors=FALSE)$V2)
> Encoding(N) <- "UTF-8"
> links <- read.table(waFile,sep=" ",skip=9212,col.names=c("u","v"))
> head(N)
[1] "ЗЫРЯНОВ_С(2008)4:24"   "ДАВЫДЕНК_В(2007)3:74"  "FARMER_J(1999)1:49"   
[4] "NA_NA(2006)12:1687"    "PISELLI_F(2007)7:867"  "KOKSAL_Y(2008)10:1498"
> head(links)
  u    v
1 1 7146
2 2 7227
3 2 7056
4 2 7650
5 5 5228
6 6 5229
> N[7146]
[1] "ЗЫРЯНОВ СЕРГЕЙ ГРИГОРЬЕВИЧ"

Citations from basic works

For a set of basic works B the corresponding citations were collected in the file cite_papers.xls. It has the structure

n  literature_cited_by_paper                    paper                                    ID        Ссылка              _merge
0  Alexa Top 500 Global Sites. -URL: http://ww❥ БЛОГИ, МИКРОБЛОГИ И СОЦИАЛЬНЫЕ СЕТИ: ИН❥ 16519995  https://elibrary.r❥ both
1  Ezhe. -URL: http://ezhe.ru/POTOP/index.2010❥ БЛОГИ, МИКРОБЛОГИ И СОЦИАЛЬНЫЕ СЕТИ: ИН❥ 16519995  https://elibrary.r❥ both
2  Кастельс М. Информационная эпоха: экономика❥ БЛОГИ, МИКРОБЛОГИ И СОЦИАЛЬНЫЕ СЕТИ: ИН❥ 16519995  https://elibrary.r❥ both
...
19 Кулюткин Ю. Н., Тарасов С. В. Образовательн❥ ОСОБЕННОСТИ ОБРАЗОВАТЕЛЬНОЙ СРЕДЫ В СЕТ❥ 23731867  https://elibrary.r❥ both
...

The character ❥ indicates “missing part of a string”.

There were some errors in the original data file (around line 42681 and line 42776). I manually corrected them. The corrected data were saved on file cited.xls.

> library(readxl)
> wdir <- "C:/Users/batagelj/work/Python/WoS/daria/Excel"
> setwd(wdir)
> excel_sheets("cited.xls")
[1] "Лист1"       "cite_papers"
> types <- c("numeric","text","skip","text","skip","skip")
> df <- read_excel("cited.xls",sheet=1,col_types=types,n_max=100)
> dim(df)
[1] 100   3
> S <- as.vector(unlist(df[,2]))
> Encoding(S) <- "UTF-8"
> length(S)
[1] 100
> head(S)
[1] "Alexa Top 500 Global Sites. -URL: http://www.alexa.com/topsites; http://www.alexa.com/topsites/countries/RU"
[2] "Ezhe. -URL: http://ezhe.ru/POTOP/index.2010.html"                                                           
[3] "Кастельс М. Информационная эпоха: экономика, общество и культура. М., 2000. С. 124."                        
[4] "Черных А. Социология массовых коммуникаций. М., 2008."                                                      
[5] "Назарчук А.В. Теория коммуникации в современной философии. М., 2009. С. 254."                               
[6] "Верховская А.И. Письма в редакцию и читатель. М., 1972."                                                     
> df <- read_excel("cited.xls",sheet=1,col_types=types)
> dim(df)
[1] 65529     3

Converting to WoS

Basic works to WoS

Dictionaries in R

wos <- file("basic.wos","w"); cit <- file("cited.wos","w")
skip <- file("skiped.txt","w"); cat(date(),"\n")
write(paste("**\n** Conversion of elibrary.ru data into WoS format\n** ",
   date(),"\n*T 1\nFN eLib2WoS\nVR 1.0\n",sep=""),wos)
noref <- 0; k <-1; nl <- nrow(links); ni <- 0; ns <- 0
for(i in 1:nb){
   write(paste("PT J",sep=""),wos)
   if(k<=nl){found <- FALSE
      if(links$u[k]==i){found <- TRUE
         writeLines(paste("AU ",N[links$v[k]],sep=""),wos,useBytes=T) 
         repeat{ 
            k <- k+1
            if(k>nl) break
            if(links$u[k]>i) break
            writeLines(paste("   ",N[links$v[k]],sep=""),wos,useBytes=T)
         }       
      }
   }
   if(!found && !is.na(af[[3]][i]))writeLines(paste("AU ",af[[3]][i],sep=""),wos,useBytes=T)
   writeLines(paste("TI ",af[[2]][i],sep=""),wos,useBytes=T)
   writeLines(paste("SO ",af[[4]][i],sep=""),wos,useBytes=T)
   cid <- af[[1]][i]
   if(exists(cid,env=iDic,inherits=FALSE)){
      code <- "CR "
      ij <- as.vector(unlist(get(cid,env=iDic,inherits=FALSE)))
      for(j in ij[1]:ij[2]){
         D <- ISIname(S[j])
         if(D$py==0){ ns <- ns+1
            writeLines(paste(i,j,S[j]),skip,useBytes=T)
         } 
         writeLines(paste(code,D$au,", ",D$py,", ",substr(D$so,1,20),
            ", V",D$vl,", P",D$bp,sep=""),wos,useBytes=T)
#         writeLines(paste("   ",S[j],sep=""),wos,useBytes=T)
         code <- "   "; ni <- ni+1
      }
   } else {
      if((noref %% 10)==0) cat("\n")
      noref <- noref+1; cat(" ",cid)
   }
   write(paste("PY ",af[[6]][i],sep=""),wos)
   write(paste("VL ",af[[5]][i],sep=""),wos)
#  write(paste("IS ",af[[2]][i],sep=""),wos)
   write(paste("BP ",af[[7]][i],sep=""),wos)
   write(paste("EP ",af[[8]][i],sep=""),wos)
   write(paste("UT ELIB:",cid,sep=""),wos)
   writeLines(paste("UT short:",N[i],sep=""),wos,useBytes=T)
   write(paste("ER \n",sep=""),wos)
}
cat("\nnc=",nc,"  ni=",ni,"  ns=",ns,"\n",date(),"\n")
close(wos); close(cit); close(skip)

References' elements

References follow many different formats:

 [1] "Левин М.И., Цирик М.Л. Математическое моделирование коррупции//Экономика и математические методы. 1998. Т. 34. Вып. 4."                                                                                                                                                
 [2] "Fleurbaey M. Beyond GDP: The Quest for a Measure of Social Welfare. Journal of Economic Literature, 2009, vol. 47, no. 4, pp. 1029-1046."                                                                                                                              
 [3] "Cooren, F. Acting and organizing: How speech acts structure organizational interactions. Concepts and Transformation, 2001a, 6(3), 275–293."                                                                                                                           
 [4] "Bian, Y. Bringing strong ties back in: Indirect ties, network bridges, and job searches in China. American Sociological Review, 1997, 62, 366–385."                                                                                                                    
 [5] "Denis, J.-L., Langley, A. & Cazale, L.   Leadership and strategic change under ambiguity. Organization Studies, 1996, 17, 673-699."                                                                                                                                    
 [6] "Ingram, Paul, Jeffrey Robinson, and Marc L. Busch. 2005. The intergovernmental network of world trade: IGO connectedness, governance and embeddedness. American Journal of Sociology 111 (3): 824-58."                                                                 
 [7] "Leeds, Brett Ashley, Andrew G. Long, and Sara McLaughlin Mitchell. 2000. Reevaluating alliance reliability: Specific threats, specific promises. Journal of Conflict Resolution 44 (5): 686-99."                                                                       
 [8] "W. Chen, Y. Wang, and S. Yang, \"Efficient influence maximization in social networks,\" in KDD 2009."                                                                                                                                                                  
 [9] "Alexander Fischer. Blog. URL: http://blog.alexander-fischer.org/kategorie/persoenlich/(дата обращения 04.02.2012)."                                                                                                                                                    
[10] "Холмогорова, Н.Г. Гаранян, Г.А. Петрова//Социальная и клиническая психиатрия. -2003. -Т. 13. -№ 2. -С. 15-24."                                                                                                                                                         
[11] "Snijders, T.A.B. & Bosker, R.J. (1994) `Modeled Variance in Two-level Models\", Sociological Methods and Research 22: 342-363."                                                                                                                                        
[12] "Erikson, Robert; John Goldthorpe and Lucienne Portocarero. 1979. `International Class Mobility in Three Western European Societies: England, France and Sweden.\"British Journal of Sociology 30: 415-441.\""                                                          
[13] "Пустовалов А. В. Пресса Великобритании: между «бумажным» прошлым и цифровым будущим//Вестник Пермского университета. Российская и зарубежная филология. 2013. Вып. 3(23). С. 191-295. URL: http://psujourn.narod.ru/profs/pustovalov.htm (дата обращения: 02.04.2013)."
[14] "И. Н. Панарин. Информационная война и геополитика. М., 2006. С. 172"                                                                                                                                                                                                   
[15] "Ирина Голицына. Информационно-коммуникационные технологии в современном образовании. Некоторые аспекты информатизации образования. -LAP LAMBERT Academic Publishing. -2012.-134 с."                                                                                    
> 

The details are considered in the function ISIname:

ISIname <- function(s) {
#   s <- gsub("\\.","\\.",s)  # cyrillic . -> .
   s <- gsub("–","-",s) # cyrillic – -> -
   z <- as.integer(regmatches(s,gregexpr("\\d{4}",s,perl=TRUE))[[1]])
   z <- z[(1800<=z)&(z<=2018)]
   au <- NA; vl <- NA; is <- NA; bp <- NA; ep <- NA; py <- 0; so <- "work" 
   urlN <- regmatches(s,regexpr(urlP,s))
   if(length(z)>0){
      s <- gsub("  "," ",s)      
      y <- regmatches(s,gregexpr("\\(\\d{4}\\)", s, perl=TRUE))[[1]]
      if(length(y)>0){ py <- as.integer(substr(y,2,5))
      } else { py <- z[length(z)] }
      p <- regmatches(s,gregexpr("\\d+-\\d+", s, perl=TRUE))[[1]]
      np <- length(p)
      if(np>0){
         s <- gsub(p[np],"",s)
         pg <- as.integer(unlist(strsplit(p[np],"-")))
         bp <- pg[1]; ep <- pg[2]
      }
      s <- gsub("№ (\\d+)","№\\1",s,perl=TRUE)
      s <- gsub("Т. (\\d+)","№\\1",s,perl=TRUE) # cyrillic Т  !!!
      s <- gsub("vol. (\\d+)","№\\1",s,perl=TRUE)
      p <- regmatches(s,gregexpr("№\\d+",s,perl=TRUE))[[1]]
      np <- length(p)
      if(np>0){ q <- p[np]
         vl <- as.integer(substr(q,2,nchar(q)))
         s <- gsub(q," ",s)
      }
      a <- gsub("(\\D)\\. (.)\\.","\\1.\\2\\. ",s,perl=TRUE)
      a <- strsplit(a," & ",perl=TRUE)[[1]][1]
      a <- strsplit(a," and ",perl=TRUE)[[1]][1]
      L <- strsplit(gsub(",","",a),"\\. ")[[1]]
      if(length(L)>1){ sec <- substr(L[1],2,2)
         if(sec==".") au <- paste(trimws(L[2]),L[1])
         else au <- paste(L[1],trimws(L[2]))
      } else au <- ifelse(nchar(L[1])<30,L[1],substr(L[1],1,30))
      L <- strsplit(au," ",perl=TRUE)[[1]]
      if(length(L)>1){
         if(str_detect(L[2],".")) au <- paste(L[1],L[2])
         else if(length(L)>2) au <- paste(L[1],L[2],L[3])
      }
   }
   if(is.na(au)){if(length(urlN)>0){U <- strsplit(urlN[1],"//")[[1]]
      au <- substr(U[2],1,20)
   } else au <- "*unknown"}
   return(list(au=au,py=py,so=so,vl=vl,is=is,bp=bp,ep=ep))
}

Some characters (A, T, ., -, …) can have two codes Latin and Cyrillic!!!

> a <- "275–293."
> a
[1] "275–293."
> utf8ToInt(a)
[1] NA
> b <- gsub("–","-",a)
> b
[1] "275-293."
> utf8ToInt(b)
[1] 50 55 53 45 50 57 51 46

During processing some problems were eliminated by manually “correcting” the data.

  • a strange character appears in the row
49772 Петрова Н. Экономика сходит на net//Коммерсантъ Деньги. 2011. 􀀀20 (827). URL: http://www.kommersant.ru/doc/1637659.
  • Some rows start with -., –, —- indicating that the row has the same author(s) as the previous one
50176	Suitor, J.J. 1987a. Friendship networks in transition: Married mothers return to school. Journal of Social and Personal Relationships 4 (4): 445-61.
50177	-- 1987b. Mother-daughter relations when married daughters return to school: Effects of status similarity. Journal of Marriage and Family 49 (2): 435-44.
50178	-- 1988. Husbands" educational attainment and support for wives" return to school. Gender & Society 2 (4): 482-95.

Program

library(stringi)
library(stringr)
library(readxl)
wdir <- "C:/Users/batagelj/work/Python/WoS/daria/Excel"
setwd(wdir)

urlP <- " ?(f|ht)(tp)(s?)(://)(.*)[.|/](.*)"
E <- c(
"Левин М.И., Цирик М.Л. Математическое моделирование коррупции//Экономика и математические методы. 1998. Т. 34. Вып. 4.",
"Fleurbaey M. Beyond GDP: The Quest for a Measure of Social Welfare. Journal of Economic Literature, 2009, vol. 47, no. 4, pp. 1029-1046.",
"Cooren, F. Acting and organizing: How speech acts structure organizational interactions. Concepts and Transformation, 2001a, 6(3), 275–293.",
"Bian, Y. Bringing strong ties back in: Indirect ties, network bridges, and job searches in China. American Sociological Review, 1997, 62, 366–385.",
"Denis, J.-L., Langley, A. & Cazale, L.   Leadership and strategic change under ambiguity. Organization Studies, 1996, 17, 673-699.",
"Ingram, Paul, Jeffrey Robinson, and Marc L. Busch. 2005. The intergovernmental network of world trade: IGO connectedness, governance and embeddedness. American Journal of Sociology 111 (3): 824-58.",
"Leeds, Brett Ashley, Andrew G. Long, and Sara McLaughlin Mitchell. 2000. Reevaluating alliance reliability: Specific threats, specific promises. Journal of Conflict Resolution 44 (5): 686-99.",
'W. Chen, Y. Wang, and S. Yang, "Efficient influence maximization in social networks," in KDD 2009.',
"Alexander Fischer. Blog. URL: http://blog.alexander-fischer.org/kategorie/persoenlich/(дата обращения 04.02.2012).",
"Холмогорова, Н.Г. Гаранян, Г.А. Петрова//Социальная и клиническая психиатрия. -2003. -Т. 13. -№ 2. -С. 15-24.",
'Snijders, T.A.B. & Bosker, R.J. (1994) `Modeled Variance in Two-level Models", Sociological Methods and Research 22: 342-363.',
'Erikson, Robert; John Goldthorpe and Lucienne Portocarero. 1979. `International Class Mobility in Three Western European Societies: England, France and Sweden."British Journal of Sociology 30: 415-441."',
"Ирина Голицына. Информационно-коммуникационные технологии в современном образовании. Некоторые аспекты информатизации образования. -LAP LAMBERT Academic Publishing. -2012.-134 с."
)

auName <- function(au){
   if(str_detect(au,"\\.")) return(au)
   a <- strsplit(au," ")[[1]]
   n <- length(a)
   if(n<2) return(au)
   i <- which(nchar(a)>3)
   if(length(i)>0)j <- i[1] else j <- 1 
   if(j<n) return(paste(a[1:j],paste(substr(a[(j+1):n],1,1),collapse=""),sep=", "))
   return(au)
}

toLatin <- function(S) 
   gsub('"',"'",gsub(intToUtf8(697),"'",
   stri_trans_general(gsub("ъ","ʹ",S),"cyrillic-latin;nfd;[:nonspacing mark:] remove;nfc")))
      
#  gsub("ʹ","'",stri_trans_general(S,"cyrillic-latin;nfd;[:nonspacing mark:] remove;nfc"))
#  stri_trans_general(gsub("ь","'",gsub("ъ","'",S)),"cyrillic-latin;nfd;[:nonspacing mark:] remove;nfc")

ISIname <- function(s) {
#   s <- gsub("\\.","\\.",s)  # cyrillic . -> .
   s <- gsub("–","-",s) # cyrillic – -> -
   z <- as.integer(regmatches(s,gregexpr("\\d{4}",s,perl=TRUE))[[1]])
   z <- z[(1800<=z)&(z<=2018)]
   au <- NA; vl <- NA; is <- NA; bp <- NA; ep <- NA; py <- 0; so <- "work" 
   urlN <- regmatches(s,regexpr(urlP,s))
   if(length(z)>0){
      s <- gsub("  "," ",s)      
      y <- regmatches(s,gregexpr("\\(\\d{4}\\)", s, perl=TRUE))[[1]]
      if(length(y)>0){ py <- as.integer(substr(y,2,5))
      } else { py <- z[length(z)] }
      p <- regmatches(s,gregexpr("\\d+-\\d+", s, perl=TRUE))[[1]]
      np <- length(p)
      if(np>0){
         s <- gsub(p[np],"",s)
         pg <- as.integer(unlist(strsplit(p[np],"-")))
         bp <- pg[1]; ep <- pg[2]
      }
      s <- gsub("№ (\\d+)","№\\1",s,perl=TRUE)
      s <- gsub("Т. (\\d+)","№\\1",s,perl=TRUE) # cyrillic Т  !!!
      s <- gsub("vol. (\\d+)","№\\1",s,perl=TRUE)
      p <- regmatches(s,gregexpr("№\\d+",s,perl=TRUE))[[1]]
      np <- length(p)
      if(np>0){ q <- p[np]
         vl <- as.integer(substr(q,2,nchar(q)))
         s <- gsub(q," ",s)
      }
      a <- gsub("(\\D)\\. (.)\\.","\\1.\\2\\. ",s,perl=TRUE)
      a <- strsplit(a," & ",perl=TRUE)[[1]][1]
      a <- strsplit(a," and ",perl=TRUE)[[1]][1]
      L <- strsplit(gsub(",","",a),"\\. ")[[1]]
      if(length(L)>1){ sec <- substr(L[1],2,2)
         if(sec==".") au <- paste(trimws(L[2]),L[1])
         else au <- paste(L[1],trimws(L[2]))
      } else au <- ifelse(nchar(L[1])<30,L[1],substr(L[1],1,30))
      L <- strsplit(au," ",perl=TRUE)[[1]]
      if(length(L)>1){
         if(str_detect(L[2],".")) au <- paste(L[1],L[2])
         else if(length(L)>2) au <- paste(L[1],L[2],L[3])
      }
   }
   if(is.na(au)){if(length(urlN)>0){U <- strsplit(urlN[1],"//")[[1]]
      au <- substr(U[2],1,20)
   } else au <- "*unknown"}
   au <- auName(toLatin(au))
   return(list(au=au,py=py,so=so,vl=vl,is=is,bp=bp,ep=ep))
}

aFile <- "C:/Users/batagelj/Documents/2017/malceva/elib/Articles.xlsx"
atypes <- c("text","text","skip","skip","text",rep("skip",4),rep("text",5),rep("skip",11))
af <- read_excel(aFile,sheet=1,col_types=atypes)
dim(af)
(nb <- nrow(af))
short <- vector(mode="character",length=nb)
Last <- vector(mode="character",length=nb)
Init <- vector(mode="character",length=nb)
S <- strsplit(af[[3]]," ") 

for(i in 1:nb) {Last[i] <- substr(S[[i]][1],1,8); Init[i] <- substr(S[[i]][2],1,1)} 
for(i in 1:nb) short[i] <- paste(Last[i],"_",Init[i],"(",af[i,6],")",af[i,5],":",af[i,7],sep="")
isi <- vector(mode="character",length=nb)
for(i in 1:nb) isi[i] <- paste(af[i,3],", ",af[i,6],", ",substr(af[i,4],1,20),", V",af[i,5],", P",af[i,7],sep="") 

waFile <- "C:/Users/batagelj/Documents/2017/malceva/elib/WA.net"
N <- as.vector(read.table(waFile,sep=" ",skip=4,nrows=9207,stringsAsFactors=FALSE)$V2)
Encoding(N) <- "UTF-8"
links <- read.table(waFile,col.names=c("u","v"),skip=9212)

excel_sheets("cited.xls")
types <- c("numeric","text","skip","text","skip","skip")                                                    
df <- read_excel("cited.xls",sheet=1,col_types=types)
dim(df)
S <- as.vector(unlist(df[,2]))
Encoding(S) <- "UTF-8"
(nc <- length(S))
head(S)
pId <- as.character(as.integer(df[[3]]))

iDic <- new.env(hash=TRUE,parent=emptyenv())
rin <- 0; lin <- 1; cpid <- pId[1]
for(cp in 1:nc) if(cpid==pId[cp]) {rin <- rin+1} else {
   assign(cpid,list(i=lin,j=rin),env=iDic)
   lin <- rin+1; rin <- lin; cpid <- pId[cp]
}
get("16519995",env=iDic,inherits=FALSE)

wos <- file("basic.wos","w"); cit <- file("cited.wos","w")
skip <- file("skiped.txt","w"); cat(date(),"\n")
write(paste("**\n** Conversion of elibrary.ru data into WoS format\n** ",
   date(),"\n*T 1\nFN eLib2WoS\nVR 1.0\n",sep=""),wos)
noref <- 0; k <-1; nl <- nrow(links); ni <- 0; ns <- 0
N <- toLatin(N)
for(i in 1:nb){
   write(paste("PT J",sep=""),wos)
   if(k<=nl){found <- FALSE
      if(links$u[k]==i){found <- TRUE
         writeLines(paste("AU ",auName(N[links$v[k]]),sep=""),wos,useBytes=T) 
         repeat{ 
            k <- k+1
            if(k>nl) break
            if(links$u[k]>i) break
            writeLines(paste("   ",auName(N[links$v[k]]),sep=""),wos,useBytes=T)
         }       
      }
   }
   if(!found && !is.na(af[[3]][i]))writeLines(paste("AU ",auName(toLatin(af[[3]][i])),sep=""),wos,useBytes=T)
   writeLines(paste("TI ",toLatin(af[[2]][i]),sep=""),wos,useBytes=T)
   writeLines(paste("J9 ",toLatin(af[[4]][i]),sep=""),wos,useBytes=T)
   cid <- af[[1]][i]
   if(exists(cid,env=iDic,inherits=FALSE)){
      code <- "CR "
      ij <- as.vector(unlist(get(cid,env=iDic,inherits=FALSE)))
      for(j in ij[1]:ij[2]){
         D <- ISIname(S[j])
         if(D$py==0){ ns <- ns+1
            writeLines(paste(i,j,S[j]),skip,useBytes=T)
         } 
         writeLines(paste(code,D$au,", ",D$py,", ",substr(D$so,1,20),
            ", V",D$vl,", P",D$bp,sep=""),wos,useBytes=T)
#         writeLines(paste("   ",S[j],sep=""),wos,useBytes=T)
         code <- "   "; ni <- ni+1
      }
   } else {
      if((noref %% 10)==0) cat("\n")
      noref <- noref+1; cat(" ",cid)
   }
   write(paste("PY ",af[[6]][i],sep=""),wos)
   write(paste("VL ",af[[5]][i],sep=""),wos)
#  write(paste("IS ",af[[2]][i],sep=""),wos)
   write(paste("BP ",af[[7]][i],sep=""),wos)
   write(paste("EP ",af[[8]][i],sep=""),wos)
   write(paste("UT ELIB:",cid,sep=""),wos)
   writeLines(paste("UT short:",N[i],sep=""),wos,useBytes=T)
   write(paste("ER \n",sep=""),wos)
}
cat("\nnc=",nc,"  ni=",ni,"  ns=",ns,"  ne=",noref,"\n",date(),"\n")
close(wos); close(cit); close(skip)

Attention: Change the paths to files.

To do:

  • write a semi-automatic program for extracting WoS components from e-Library references
  • write a program to transform Scopus bibliography to WoS
ru/cite.txt · Last modified: 2017/10/13 18:17 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki