====== Keywords through time ====== Boundary problem ostanejo DC > 0 or DC=0 and (Cite_indeg >= 3) or not remove ===== Constructing temporal network WKti and its slices ===== > py <- read.table(file="Yearb.clu",header=FALSE,skip=1,colClasses="character")$V1 > length(py) [1] 45917 > n1 <- 45917; n2 <- 36275; n <- n1+n2; ma <- 371224 > nodes <- read.table(file="WKb.net",header=FALSE,skip=1,nrows=n,colClasses="character") > dim(nodes) [1] 82192 5 > head(nodes) V1 V2 V3 V4 V5 1 1 CHALMERS_I(1989): 0.0000 0.0000 0.5000 2 2 CHALMERS_T(1981)2:31 0.0000 0.0000 0.5000 3 3 DETSKY_A(1992)45:255 0.0000 0.0000 0.5000 4 4 FEINSTEI_A(1987): 0.0000 0.0000 0.5000 5 5 FISHER_M(1994)272:143 0.0000 0.0000 0.5000 6 6 FLEISS_J(1991)44:127 0.0000 0.0000 0.5000 > arcs <- read.table(file="WKb.net",header=FALSE,skip=2+n,colClasses="character") > dim(arcs) [1] 371224 3 > head(arcs) V1 V2 V3 1 18 45918 1 2 18 45919 1 3 18 45920 1 4 18 45921 1 5 18 45922 1 6 18 45923 1 > tfirst <- 1900; tlast <- 2017 > net <- file("WKti.net","w") > cat('*vertices',n,n1,'\n',sep=' ',file=net) > for(v in 1:n1) cat(nodes$V1[v],' "',nodes$V2[v],'" [',py[v],']\n',sep='',file=net) > for(v in 1:n2) cat(nodes$V1[n1+v],' "',nodes$V2[n1+v],'" [',tfirst,'-',tlast,']\n',sep='',file=net) > cat('*arcs\n',file=net) > m <- nrow(arcs) > m [1] 371224 > for(a in 1:m) cat(arcs$V1[a],arcs$V2[a],arcs$V3[a],'[',py[as.integer(arcs$V1[a])],']\n',file=net) > close(net) Analiza v Pajku posameznih rezin. ===== Constructing frequency table in R ===== setwd("C:\\Users\\batagelj\\work\\Python\\WoS\\peere2\\keys") getInd <- function(key,dict){ if(!exists(key,envir=dict,inherits=FALSE)) { assign(key,length(dict)+1,envir=dict) } return(get(key,envir=dict,inherits=FALSE)) } addTime <- function(fnet,fclu){ net <- file(fnet,"r") LL <- unlist(strsplit(L <- readLines(net,n=1),"[[:space:]]+")) n <- as.integer(LL[2]); n1 <- as.integer(LL[3]); n2 <- n-n1 S <- read.table(net,skip=n1,nrows=n2,fill=TRUE,colClasses=list("integer","character", "numeric","numeric","numeric","character")) close(net) ideg <- as.integer(read.table(file=fclu,header=FALSE,skip=1+n1,colClasses="character")$V1) d <- integer(nk) for(i in 1:n2) {j <- getInd(S$V2[i],K); d[i] <- ideg[j]} return(d) } net <- file("WKti.net","r") LL <- unlist(strsplit(L <- readLines(net,n=1),"[[:space:]]+")) n <- as.integer(LL[2]); n1 <- as.integer(LL[3]); nk <- n-n1 S <- read.table(net,skip=n1,nrows=nk,fill=TRUE,colClasses=list("integer","character", "numeric","numeric","numeric","character")) close(net) ideg <- as.integer(read.table(file="tot.clu",header=FALSE,skip=1+n1,colClasses="character")$V1) K <- new.env(hash=TRUE,parent=emptyenv()) t <- integer(nk) for(i in 1:nk) {j <- getInd(S$V2[i],K); t[i] <- ideg[j]} f1 <- addTime('K1.net','F1.clu') f2 <- addTime('K2.net','F2.clu') f3 <- addTime('K3.net','F3.clu') f4 <- addTime('K4.net','F4.clu') f5 <- addTime('K5.net','F5.clu') f6 <- addTime('K6.net','F6.clu') f7 <- addTime('K7.net','F7.clu') A <- data.frame(K=S$V2,t,f1,f2,f3,f4,f5,f6,f7) save(A,file="Keywords.Rdata") ===== Analyzing frequency table ===== > setwd("C:\\Users\\batagelj\\work\\Python\\WoS\\peere2\\keys") > load("Keywords.Rdata") > head(A) K t f1 f2 f3 f4 f5 f6 f7 1 control 930 1 7 7 125 156 175 459 2 meta-analysis 673 0 0 1 43 72 134 423 3 necessary 17 0 2 1 2 2 3 7 4 randomize 1193 0 0 6 73 146 330 638 5 metaanalysis 394 0 0 1 47 44 67 235 6 assess 250 0 2 6 29 27 64 122 > nk <- nrow(A) > nk [1] 36275 > Q <- A[2:9] > s <- apply(Q,2,sum) > s t f1 f2 f3 f4 f5 f6 f7 371224 916 2257 4064 34511 42757 91495 195224 > for(i in 1:8) Q[i] <- nk*Q[i]/s[i] > Q[1:20,] t f1 f2 f3 f4 f5 f6 f7 1 90.8770715 39.60153 112.50554 62.481545 131.389267 132.3502584 69.3822067 85.2877976 2 65.7637303 0.00000 0.00000 8.925935 45.197908 61.0847347 53.1269468 78.5985586 3 1.6611938 0.00000 32.14444 8.925935 2.102228 1.6967982 1.1894093 1.3006854 4 116.5767165 0.00000 0.00000 53.555610 76.731332 123.8662675 130.8350183 118.5481806 5 38.5006088 0.00000 0.00000 8.925935 49.402364 37.3295601 26.5634734 43.6658659 6 24.4293203 0.00000 32.14444 53.555610 30.482310 22.9067755 25.3740642 22.6690878 7 141.3969059 0.00000 48.21666 53.555610 174.484947 207.8577777 124.8879720 132.2982830 8 116.0881301 0.00000 64.28888 80.333415 158.718235 150.1666394 123.3020930 99.5953366 9 203.7405313 0.00000 80.36110 62.481545 134.542610 166.2862221 186.7372534 237.4679855 10 107.1958575 0.00000 48.21666 44.629675 140.849294 105.2014875 116.1656375 99.9669610 11 72.9948091 3524.53603 128.57776 169.592766 83.038017 78.0527165 68.1927974 53.5139122 12 49.2495097 39.60153 32.14444 98.185285 62.015734 49.2071474 59.0739931 41.6219317 13 35.2759385 0.00000 16.07222 8.925935 33.635652 27.1487710 34.4928685 38.6489366 14 2.2474975 0.00000 0.00000 26.777805 8.408913 0.0000000 1.9823488 1.3006854 15 209.0172645 118.80459 225.01108 285.629921 352.123236 249.4293332 218.4548336 169.0890977 16 34.6896348 0.00000 0.00000 8.925935 32.584538 26.3003719 34.8893382 37.9056878 17 0.2931518 0.00000 0.00000 0.000000 1.051114 1.6967982 0.0000000 0.0000000 18 0.4885864 0.00000 0.00000 0.000000 1.051114 0.8483991 0.7929395 0.1858122 19 0.4885864 0.00000 0.00000 0.000000 1.051114 0.8483991 0.7929395 0.1858122 20 40.3572371 39.60153 0.00000 26.777805 53.606821 38.1779592 38.0610962 40.3212464 > nz <- apply(Q>0,1,sum)-1 > nz[1:20] [1] 7 5 6 5 5 6 6 6 6 6 7 7 6 4 7 5 2 4 4 6 > A$K[1:20] [1] control meta-analysis necessary randomize metaanalysis [6] assess trial clinical health assessment [11] report article technology blinding quality [16] pain pattern-recognition electroencephalography augmentative communication > P <- Q[2:8] > # for(i in 1:7) P[,i] <- (P[,i]/Q$t)*log(P[,i]) > head(P) f1 f2 f3 f4 f5 f6 f7 [1,] 39.60153 112.50554 62.481545 131.389267 132.350258 69.382207 85.287798 [2,] 0.00000 0.00000 8.925935 45.197908 61.084735 53.126947 78.598559 [3,] 0.00000 32.14444 8.925935 2.102228 1.696798 1.189409 1.300685 [4,] 0.00000 0.00000 53.555610 76.731332 123.866268 130.835018 118.548181 [5,] 0.00000 0.00000 8.925935 49.402364 37.329560 26.563473 43.665866 [6,] 0.00000 32.14444 53.555610 30.482310 22.906775 25.374064 22.669088 > P[is.nan(P)] <- 0 > head(P) f1 f2 f3 f4 f5 f6 f7 [1,] 39.60153 112.50554 62.481545 131.389267 132.350258 69.382207 85.287798 [2,] 0.00000 0.00000 8.925935 45.197908 61.084735 53.126947 78.598559 [3,] 0.00000 32.14444 8.925935 2.102228 1.696798 1.189409 1.300685 [4,] 0.00000 0.00000 53.555610 76.731332 123.866268 130.835018 118.548181 [5,] 0.00000 0.00000 8.925935 49.402364 37.329560 26.563473 43.665866 [6,] 0.00000 32.14444 53.555610 30.482310 22.906775 25.374064 22.669088 > Keys <- as.character(A$K) > i1 <- order(P[,1],decreasing=TRUE) > i2 <- order(P[,2],decreasing=TRUE) > i3 <- order(P[,3],decreasing=TRUE) > i4 <- order(P[,4],decreasing=TRUE) > i5 <- order(P[,5],decreasing=TRUE) > i6 <- order(P[,6],decreasing=TRUE) > i7 <- order(P[,7],decreasing=TRUE) > K1 <- Keys[i1] > K2 <- Keys[i2] > K3 <- Keys[i3] > K4 <- Keys[i4] > K5 <- Keys[i5] > K6 <- Keys[i6] > K7 <- Keys[i7] > R <- cbind(K1,K2,K3,K4,K5,K6,K7) > R[1:20,] K1 K2 K3 K4 K5 K6 K7 [1,] "referee" "review" "peer-review" "review" "review" "review" "review" [2,] "report" "peer" "referee" "peer" "peer" "peer" "systematic" [3,] "recommendation" "referee" "journal" "peer-review" "research" "research" "peer" [4,] "medical" "peer-review" "review" "quality" "quality" "quality" "health" [5,] "subcommittee" "care" "peer" "research" "trial" "health" "research" [6,] "committee" "reply" "process" "journal" "journal" "journal" "quality" [7,] "act" "quality" "research" "referee" "health" "systematic" "management" [8,] "insurance" "medical" "editorial" "medical" "management" "management" "care" [9,] "subcommittee-b" "method" "reviewer" "care" "referee" "publication" "study" [10,] "subcommittee-c" "role" "quality" "publication" "publication" "care" "impact" [11,] "bankruptcy" "comment" "medical" "trial" "clinical" "treatment" "use" [12,] "subcommittee-d" "research" "scientific" "clinical" "medical" "patient" "analysis" [13,] "subcommittee-a" "use" "science" "management" "analysis" "analysis" "trial" [14,] "science" "study" "publication" "science" "science" "impact" "literature" [15,] "compensation" "scientific" "reply" "assessment" "care" "randomize" "patient" [16,] "scientific" "journal" "editor" "study" "therapy" "therapy" "intervention" [17,] "workman" "impact" "policy" "therapy" "control" "trial" "therapy" [18,] "national" "science" "manuscript" "health" "disease" "clinical" "journal" [19,] "review" "process" "report" "analysis" "patient" "science" "outcome" [20,] "peer" "experience" "author" "use" "randomize" "study" "risk" > > index <- function(x,const) return (x*(1-x)**const) > curve(index(x,3),from=0,to=1,n=50) > curve(index(x,0.3),from=0,to=1,n=50) > curve(index(x,0.1),from=0,to=1,n=50) {{notes:pics:index3.png?300}} {{notes:pics:index03.png?300}} {{notes:pics:index01.png?300}} > P <- as.matrix(Q[2:8]) > for(i in 1:7) {maxp <- max(P[,i]); cat(i,'max=',maxp,'\n'); P[,i] <- (P[,i]/Q$t)*index(P[,i]/maxp,0.1)} 1 max= 7128.275 2 max= 3760.899 3 max= 5775.08 4 max= 797.7956 5 max= 739.804 6 max= 695.0115 7 max= 687.6909 > head(P) f1 f2 f3 f4 f5 f6 f7 [1,] 0.002419598 0.03692179 0.0074305125 0.233861404 0.255457253 0.075419288 0.114861787 [2,] 0.000000000 0.00000000 0.0002097470 0.038710225 0.076036062 0.061262934 0.134951716 [3,] 0.000000000 0.16524446 0.0083035149 0.003333751 0.002342198 0.001225112 0.001480637 [4,] 0.000000000 0.00000000 0.0042563320 0.062668635 0.174670521 0.206912393 0.172015451 [5,] 0.000000000 0.00000000 0.0003582735 0.078951438 0.048671289 0.026267438 0.071544162 [6,] 0.000000000 0.01123662 0.0203112164 0.047489853 0.028942356 0.037779959 0.030486507 > P[is.nan(P)] <- 0 > head(P) f1 f2 f3 f4 f5 f6 f7 [1,] 0.002419598 0.03692179 0.0074305125 0.233861404 0.255457253 0.075419288 0.114861787 [2,] 0.000000000 0.00000000 0.0002097470 0.038710225 0.076036062 0.061262934 0.134951716 [3,] 0.000000000 0.16524446 0.0083035149 0.003333751 0.002342198 0.001225112 0.001480637 [4,] 0.000000000 0.00000000 0.0042563320 0.062668635 0.174670521 0.206912393 0.172015451 [5,] 0.000000000 0.00000000 0.0003582735 0.078951438 0.048671289 0.026267438 0.071544162 [6,] 0.000000000 0.01123662 0.0203112164 0.047489853 0.028942356 0.037779959 0.030486507 > Keys <- as.character(A$K) > i1 <- order(P[,1],decreasing=TRUE) > i2 <- order(P[,2],decreasing=TRUE) > i3 <- order(P[,3],decreasing=TRUE) > i4 <- order(P[,4],decreasing=TRUE) > i5 <- order(P[,5],decreasing=TRUE) > i6 <- order(P[,6],decreasing=TRUE) > i7 <- order(P[,7],decreasing=TRUE) > K1 <- Keys[i1] > K2 <- Keys[i2] > K3 <- Keys[i3] > K4 <- Keys[i4] > K5 <- Keys[i5] > K6 <- Keys[i6] > K7 <- Keys[i7] > R <- cbind(K1,K2,K3,K4,K5,K6,K7) > R[1:20,] K1 K2 K3 K4 K5 [1,] "subcommittee" "peer" "referee" "peer-review" "peer" [2,] "recommendation" "referee" "reply" "peer" "research" [3,] "subcommittee-b" "right-of-access" "weigh" "quality" "trial" [4,] "subcommittee-c" "psro" "nsf" "journal" "quality" [5,] "report" "peer-review" "balance" "referee" "journal" [6,] "subcommittee-d" "rx" "editorial" "medical" "medical" [7,] "subcommittee-a" "optional" "ceci" "research" "clinical" [8,] "bankruptcy" "anonymous" "racism" "publication" "control" [9,] "workman" "national-institutes-of-health" "patrick" "care" "referee" [10,] "jurisdiction" "continuing-education" "antitrust" "assurance" "publication" [11,] "goodsir" "jr." "academic-freedom" "audit" "science" [12,] "paget" "rubin" "peter" "clinical" "management" [13,] "nystagmus" "cole" "difficult" "trial" "analysis" [14,] "1906" "american-society-for-clinical-investigation" "anonymous" "physician" "disease" [15,] "malinger" "adamha" "process" "control" "therapy" [16,] "insurance" "child-psychiatry" "antitrust-law" "reliability" "health" [17,] "james" "problem-oriented" "risk-benefits" "process" "randomize" [18,] "miner" "nisbet" "exclusionary" "assessment" "patient" [19,] "sir" "royal-australasian-college-of-radiologists" "tabloid" "evaluation" "new" [20,] "act" "community-hospital" "harnad" "manuscript" "care" K6 K7 [1,] "peer" "systematic" [2,] "research" "health" [3,] "quality" "research" [4,] "journal" "study" [5,] "health" "management" [6,] "treatment" "care" [7,] "publication" "peer" [8,] "randomize" "intervention" [9,] "management" "impact" [10,] "patient" "protocol" [11,] "therapy" "use" [12,] "impact" "literature" [13,] "clinical" "quality" [14,] "analysis" "outcome" [15,] "assessment" "analysis" [16,] "systematic" "patient" [17,] "medical" "risk" [18,] "science" "trial" [19,] "care" "therapy" [20,] "outcome" "randomize" > > P <- as.matrix(Q[2:8]) > for(i in 1:7) {maxp <- max(P[,i]); cat(i,'max=',maxp,'\n'); P[,i] <- (P[,i]/Q$t)*index(P[,i]/maxp,0.3)} ... > R[1:20,] K1 K2 K3 K4 K5 [1,] "subcommittee" "referee" "referee" "peer-review" "peer" [2,] "recommendation" "right-of-access" "weigh" "peer" "research" [3,] "subcommittee-b" "psro" "reply" "quality" "trial" [4,] "subcommittee-c" "peer" "nsf" "journal" "quality" [5,] "subcommittee-d" "peer-review" "balance" "referee" "journal" [6,] "report" "rx" "ceci" "medical" "medical" [7,] "subcommittee-a" "optional" "editorial" "research" "control" [8,] "bankruptcy" "anonymous" "racism" "publication" "clinical" [9,] "workman" "national-institutes-of-health" "patrick" "assurance" "referee" [10,] "jurisdiction" "continuing-education" "antitrust" "care" "publication" [11,] "goodsir" "jr." "academic-freedom" "audit" "science" [12,] "paget" "rubin" "peter" "clinical" "management" [13,] "nystagmus" "cole" "difficult" "trial" "analysis" [14,] "1906" "american-society-for-clinical-investigation" "anonymous" "physician" "disease" [15,] "malinger" "adamha" "antitrust-law" "reliability" "therapy" [16,] "insurance" "child-psychiatry" "risk-benefits" "control" "health" [17,] "james" "problem-oriented" "exclusionary" "process" "randomize" [18,] "miner" "nisbet" "tabloid" "assessment" "new" [19,] "sir" "royal-australasian-college-of-radiologists" "harnad" "evaluation" "patient" [20,] "act" "community-hospital" "ceprime" "manuscript" "care" K6 K7 [1,] "peer" "systematic" [2,] "research" "health" [3,] "quality" "research" [4,] "journal" "study" [5,] "health" "management" [6,] "treatment" "care" [7,] "publication" "intervention" [8,] "randomize" "protocol" [9,] "management" "peer" [10,] "patient" "impact" [11,] "therapy" "use" [12,] "impact" "literature" [13,] "clinical" "quality" [14,] "analysis" "outcome" [15,] "assessment" "analysis" [16,] "medical" "patient" [17,] "science" "risk" [18,] "systematic" "therapy" [19,] "care" "trial" [20,] "outcome" "controlled-trial" > > P <- as.matrix(Q[2:8]) > for(i in 1:7) {maxp <- max(P[,i]); cat(i,'max=',maxp,'\n'); P[,i] <- (P[,i]/Q$t)*index(P[,i]/maxp,0.5)} ... > R[1:20,] K1 K2 K3 K4 K5 [1,] "subcommittee" "right-of-access" "referee" "peer-review" "peer" [2,] "subcommittee-b" "referee" "weigh" "peer" "trial" [3,] "subcommittee-c" "psro" "reply" "quality" "research" [4,] "recommendation" "peer-review" "nsf" "journal" "quality" [5,] "subcommittee-d" "rx" "balance" "referee" "journal" [6,] "subcommittee-a" "optional" "ceci" "medical" "medical" [7,] "report" "anonymous" "racism" "research" "control" [8,] "bankruptcy" "national-institutes-of-health" "editorial" "assurance" "clinical" [9,] "workman" "continuing-education" "patrick" "publication" "referee" [10,] "jurisdiction" "peer" "antitrust" "audit" "publication" [11,] "goodsir" "jr." "academic-freedom" "care" "science" [12,] "paget" "rubin" "peter" "clinical" "management" [13,] "nystagmus" "cole" "difficult" "physician" "analysis" [14,] "1906" "american-society-for-clinical-investigation" "anonymous" "trial" "disease" [15,] "malinger" "adamha" "antitrust-law" "reliability" "therapy" [16,] "insurance" "child-psychiatry" "risk-benefits" "control" "randomize" [17,] "james" "problem-oriented" "exclusionary" "process" "health" [18,] "miner" "nisbet" "tabloid" "assessment" "new" [19,] "sir" "royal-australasian-college-of-radiologists" "harnad" "evaluation" "patient" [20,] "act" "community-hospital" "ceprime" "manuscript" "literature" K6 K7 [1,] "peer" "systematic" [2,] "research" "health" [3,] "quality" "research" [4,] "journal" "study" [5,] "treatment" "management" [6,] "health" "care" [7,] "publication" "intervention" [8,] "randomize" "protocol" [9,] "management" "impact" [10,] "patient" "peer" [11,] "therapy" "use" [12,] "clinical" "literature" [13,] "impact" "outcome" [14,] "analysis" "analysis" [15,] "assessment" "quality" [16,] "medical" "patient" [17,] "science" "risk" [18,] "care" "therapy" [19,] "systematic" "trial" [20,] "outcome" "controlled-trial" >