Keywords through time

Boundary problem

ostanejo DC > 0 or DC=0 and (Cite_indeg >= 3) or not remove

Constructing temporal network WKti and its slices

> py <- read.table(file="Yearb.clu",header=FALSE,skip=1,colClasses="character")$V1
> length(py)
[1] 45917
> n1 <- 45917; n2 <- 36275; n <- n1+n2; ma <- 371224
> nodes <- read.table(file="WKb.net",header=FALSE,skip=1,nrows=n,colClasses="character")
> dim(nodes)
[1] 82192     5
> head(nodes)
  V1                    V2     V3     V4     V5
1  1     CHALMERS_I(1989): 0.0000 0.0000 0.5000
2  2  CHALMERS_T(1981)2:31 0.0000 0.0000 0.5000
3  3  DETSKY_A(1992)45:255 0.0000 0.0000 0.5000
4  4     FEINSTEI_A(1987): 0.0000 0.0000 0.5000
5  5 FISHER_M(1994)272:143 0.0000 0.0000 0.5000
6  6  FLEISS_J(1991)44:127 0.0000 0.0000 0.5000
> arcs <- read.table(file="WKb.net",header=FALSE,skip=2+n,colClasses="character")
> dim(arcs)
[1] 371224      3
> head(arcs)
  V1    V2 V3
1 18 45918  1
2 18 45919  1
3 18 45920  1
4 18 45921  1
5 18 45922  1
6 18 45923  1
> tfirst <- 1900; tlast <- 2017
> net <- file("WKti.net","w")
> cat('*vertices',n,n1,'\n',sep=' ',file=net)
> for(v in 1:n1) cat(nodes$V1[v],' "',nodes$V2[v],'" [',py[v],']\n',sep='',file=net)
> for(v in 1:n2) cat(nodes$V1[n1+v],' "',nodes$V2[n1+v],'" [',tfirst,'-',tlast,']\n',sep='',file=net)
> cat('*arcs\n',file=net)
> m <- nrow(arcs)
> m
[1] 371224
> for(a in 1:m) cat(arcs$V1[a],arcs$V2[a],arcs$V3[a],'[',py[as.integer(arcs$V1[a])],']\n',file=net)
> close(net)

Analiza v Pajku posameznih rezin.

Constructing frequency table in R

setwd("C:\\Users\\batagelj\\work\\Python\\WoS\\peere2\\keys")
getInd <- function(key,dict){
  if(!exists(key,envir=dict,inherits=FALSE)) {
    assign(key,length(dict)+1,envir=dict)
  }
  return(get(key,envir=dict,inherits=FALSE))
}
addTime <- function(fnet,fclu){
  net <- file(fnet,"r")
  LL <- unlist(strsplit(L <- readLines(net,n=1),"[[:space:]]+"))
  n <- as.integer(LL[2]); n1 <- as.integer(LL[3]); n2 <- n-n1
  S <- read.table(net,skip=n1,nrows=n2,fill=TRUE,colClasses=list("integer","character",
       "numeric","numeric","numeric","character"))
  close(net)
  ideg <- as.integer(read.table(file=fclu,header=FALSE,skip=1+n1,colClasses="character")$V1)
  d <- integer(nk)
  for(i in 1:n2) {j <- getInd(S$V2[i],K); d[i] <- ideg[j]}
  return(d)
}

net <- file("WKti.net","r")
LL <- unlist(strsplit(L <- readLines(net,n=1),"[[:space:]]+"))
n <- as.integer(LL[2]); n1 <- as.integer(LL[3]); nk <- n-n1
S <- read.table(net,skip=n1,nrows=nk,fill=TRUE,colClasses=list("integer","character",
     "numeric","numeric","numeric","character"))
close(net)
ideg <- as.integer(read.table(file="tot.clu",header=FALSE,skip=1+n1,colClasses="character")$V1)
K <- new.env(hash=TRUE,parent=emptyenv())
t <- integer(nk)
for(i in 1:nk) {j <- getInd(S$V2[i],K); t[i] <- ideg[j]}

f1 <- addTime('K1.net','F1.clu')
f2 <- addTime('K2.net','F2.clu')
f3 <- addTime('K3.net','F3.clu')
f4 <- addTime('K4.net','F4.clu')
f5 <- addTime('K5.net','F5.clu')
f6 <- addTime('K6.net','F6.clu')
f7 <- addTime('K7.net','F7.clu')

A <- data.frame(K=S$V2,t,f1,f2,f3,f4,f5,f6,f7)

save(A,file="Keywords.Rdata")

Analyzing frequency table

> setwd("C:\\Users\\batagelj\\work\\Python\\WoS\\peere2\\keys")
> load("Keywords.Rdata")
> head(A)
              K    t f1 f2 f3  f4  f5  f6  f7
1       control  930  1  7  7 125 156 175 459
2 meta-analysis  673  0  0  1  43  72 134 423
3     necessary   17  0  2  1   2   2   3   7
4     randomize 1193  0  0  6  73 146 330 638
5  metaanalysis  394  0  0  1  47  44  67 235
6        assess  250  0  2  6  29  27  64 122
> nk <- nrow(A)
> nk
[1] 36275
> Q <- A[2:9]
> s <- apply(Q,2,sum)
> s
     t     f1     f2     f3     f4     f5     f6     f7
371224    916   2257   4064  34511  42757  91495 195224
> for(i in 1:8) Q[i] <- nk*Q[i]/s[i]
> Q[1:20,]
             t         f1        f2         f3         f4          f5          f6          f7
1   90.8770715   39.60153 112.50554  62.481545 131.389267 132.3502584  69.3822067  85.2877976
2   65.7637303    0.00000   0.00000   8.925935  45.197908  61.0847347  53.1269468  78.5985586
3    1.6611938    0.00000  32.14444   8.925935   2.102228   1.6967982   1.1894093   1.3006854
4  116.5767165    0.00000   0.00000  53.555610  76.731332 123.8662675 130.8350183 118.5481806
5   38.5006088    0.00000   0.00000   8.925935  49.402364  37.3295601  26.5634734  43.6658659
6   24.4293203    0.00000  32.14444  53.555610  30.482310  22.9067755  25.3740642  22.6690878
7  141.3969059    0.00000  48.21666  53.555610 174.484947 207.8577777 124.8879720 132.2982830
8  116.0881301    0.00000  64.28888  80.333415 158.718235 150.1666394 123.3020930  99.5953366
9  203.7405313    0.00000  80.36110  62.481545 134.542610 166.2862221 186.7372534 237.4679855
10 107.1958575    0.00000  48.21666  44.629675 140.849294 105.2014875 116.1656375  99.9669610
11  72.9948091 3524.53603 128.57776 169.592766  83.038017  78.0527165  68.1927974  53.5139122
12  49.2495097   39.60153  32.14444  98.185285  62.015734  49.2071474  59.0739931  41.6219317
13  35.2759385    0.00000  16.07222   8.925935  33.635652  27.1487710  34.4928685  38.6489366
14   2.2474975    0.00000   0.00000  26.777805   8.408913   0.0000000   1.9823488   1.3006854
15 209.0172645  118.80459 225.01108 285.629921 352.123236 249.4293332 218.4548336 169.0890977
16  34.6896348    0.00000   0.00000   8.925935  32.584538  26.3003719  34.8893382  37.9056878
17   0.2931518    0.00000   0.00000   0.000000   1.051114   1.6967982   0.0000000   0.0000000
18   0.4885864    0.00000   0.00000   0.000000   1.051114   0.8483991   0.7929395   0.1858122
19   0.4885864    0.00000   0.00000   0.000000   1.051114   0.8483991   0.7929395   0.1858122
20  40.3572371   39.60153   0.00000  26.777805  53.606821  38.1779592  38.0610962  40.3212464
> nz <- apply(Q>0,1,sum)-1
> nz[1:20]
 [1] 7 5 6 5 5 6 6 6 6 6 7 7 6 4 7 5 2 4 4 6
> A$K[1:20]
 [1] control                meta-analysis          necessary              randomize              metaanalysis
 [6] assess                 trial                  clinical               health                 assessment
[11] report                 article                technology             blinding               quality
[16] pain                   pattern-recognition    electroencephalography augmentative           communication
> P <- Q[2:8]
> # for(i in 1:7) P[,i] <- (P[,i]/Q$t)*log(P[,i])
> head(P)
           f1        f2        f3         f4         f5         f6         f7
[1,] 39.60153 112.50554 62.481545 131.389267 132.350258  69.382207  85.287798
[2,]  0.00000   0.00000  8.925935  45.197908  61.084735  53.126947  78.598559
[3,]  0.00000  32.14444  8.925935   2.102228   1.696798   1.189409   1.300685
[4,]  0.00000   0.00000 53.555610  76.731332 123.866268 130.835018 118.548181
[5,]  0.00000   0.00000  8.925935  49.402364  37.329560  26.563473  43.665866
[6,]  0.00000  32.14444 53.555610  30.482310  22.906775  25.374064  22.669088
> P[is.nan(P)] <- 0
> head(P)
           f1        f2        f3         f4         f5         f6         f7
[1,] 39.60153 112.50554 62.481545 131.389267 132.350258  69.382207  85.287798
[2,]  0.00000   0.00000  8.925935  45.197908  61.084735  53.126947  78.598559
[3,]  0.00000  32.14444  8.925935   2.102228   1.696798   1.189409   1.300685
[4,]  0.00000   0.00000 53.555610  76.731332 123.866268 130.835018 118.548181
[5,]  0.00000   0.00000  8.925935  49.402364  37.329560  26.563473  43.665866
[6,]  0.00000  32.14444 53.555610  30.482310  22.906775  25.374064  22.669088
> Keys <- as.character(A$K)
> i1 <- order(P[,1],decreasing=TRUE)
> i2 <- order(P[,2],decreasing=TRUE)
> i3 <- order(P[,3],decreasing=TRUE)
> i4 <- order(P[,4],decreasing=TRUE)
> i5 <- order(P[,5],decreasing=TRUE)
> i6 <- order(P[,6],decreasing=TRUE)
> i7 <- order(P[,7],decreasing=TRUE)
> K1 <- Keys[i1]
> K2 <- Keys[i2]
> K3 <- Keys[i3]
> K4 <- Keys[i4]
> K5 <- Keys[i5]
> K6 <- Keys[i6]
> K7 <- Keys[i7]
> R <- cbind(K1,K2,K3,K4,K5,K6,K7)
> R[1:20,]
      K1               K2            K3            K4            K5            K6            K7
 [1,] "referee"        "review"      "peer-review" "review"      "review"      "review"      "review"
 [2,] "report"         "peer"        "referee"     "peer"        "peer"        "peer"        "systematic"
 [3,] "recommendation" "referee"     "journal"     "peer-review" "research"    "research"    "peer"
 [4,] "medical"        "peer-review" "review"      "quality"     "quality"     "quality"     "health"
 [5,] "subcommittee"   "care"        "peer"        "research"    "trial"       "health"      "research"
 [6,] "committee"      "reply"       "process"     "journal"     "journal"     "journal"     "quality"
 [7,] "act"            "quality"     "research"    "referee"     "health"      "systematic"  "management"
 [8,] "insurance"      "medical"     "editorial"   "medical"     "management"  "management"  "care"
 [9,] "subcommittee-b" "method"      "reviewer"    "care"        "referee"     "publication" "study"
[10,] "subcommittee-c" "role"        "quality"     "publication" "publication" "care"        "impact"
[11,] "bankruptcy"     "comment"     "medical"     "trial"       "clinical"    "treatment"   "use"
[12,] "subcommittee-d" "research"    "scientific"  "clinical"    "medical"     "patient"     "analysis"
[13,] "subcommittee-a" "use"         "science"     "management"  "analysis"    "analysis"    "trial"
[14,] "science"        "study"       "publication" "science"     "science"     "impact"      "literature"
[15,] "compensation"   "scientific"  "reply"       "assessment"  "care"        "randomize"   "patient"
[16,] "scientific"     "journal"     "editor"      "study"       "therapy"     "therapy"     "intervention"
[17,] "workman"        "impact"      "policy"      "therapy"     "control"     "trial"       "therapy"
[18,] "national"       "science"     "manuscript"  "health"      "disease"     "clinical"    "journal"
[19,] "review"         "process"     "report"      "analysis"    "patient"     "science"     "outcome"
[20,] "peer"           "experience"  "author"      "use"         "randomize"   "study"       "risk"
>
> index <- function(x,const) return (x*(1-x)**const)
> curve(index(x,3),from=0,to=1,n=50)
> curve(index(x,0.3),from=0,to=1,n=50)
> curve(index(x,0.1),from=0,to=1,n=50)

> P <- as.matrix(Q[2:8])
> for(i in 1:7) {maxp <- max(P[,i]); cat(i,'max=',maxp,'\n'); P[,i] <- (P[,i]/Q$t)*index(P[,i]/maxp,0.1)}
1 max= 7128.275
2 max= 3760.899
3 max= 5775.08
4 max= 797.7956
5 max= 739.804
6 max= 695.0115
7 max= 687.6909
> head(P)
              f1         f2           f3          f4          f5          f6          f7
[1,] 0.002419598 0.03692179 0.0074305125 0.233861404 0.255457253 0.075419288 0.114861787
[2,] 0.000000000 0.00000000 0.0002097470 0.038710225 0.076036062 0.061262934 0.134951716
[3,] 0.000000000 0.16524446 0.0083035149 0.003333751 0.002342198 0.001225112 0.001480637
[4,] 0.000000000 0.00000000 0.0042563320 0.062668635 0.174670521 0.206912393 0.172015451
[5,] 0.000000000 0.00000000 0.0003582735 0.078951438 0.048671289 0.026267438 0.071544162
[6,] 0.000000000 0.01123662 0.0203112164 0.047489853 0.028942356 0.037779959 0.030486507
> P[is.nan(P)] <- 0
> head(P)
              f1         f2           f3          f4          f5          f6          f7
[1,] 0.002419598 0.03692179 0.0074305125 0.233861404 0.255457253 0.075419288 0.114861787
[2,] 0.000000000 0.00000000 0.0002097470 0.038710225 0.076036062 0.061262934 0.134951716
[3,] 0.000000000 0.16524446 0.0083035149 0.003333751 0.002342198 0.001225112 0.001480637
[4,] 0.000000000 0.00000000 0.0042563320 0.062668635 0.174670521 0.206912393 0.172015451
[5,] 0.000000000 0.00000000 0.0003582735 0.078951438 0.048671289 0.026267438 0.071544162
[6,] 0.000000000 0.01123662 0.0203112164 0.047489853 0.028942356 0.037779959 0.030486507
> Keys <- as.character(A$K)
> i1 <- order(P[,1],decreasing=TRUE)
> i2 <- order(P[,2],decreasing=TRUE)
> i3 <- order(P[,3],decreasing=TRUE)
> i4 <- order(P[,4],decreasing=TRUE)
> i5 <- order(P[,5],decreasing=TRUE)
> i6 <- order(P[,6],decreasing=TRUE)
> i7 <- order(P[,7],decreasing=TRUE)
> K1 <- Keys[i1]
> K2 <- Keys[i2]
> K3 <- Keys[i3]
> K4 <- Keys[i4]
> K5 <- Keys[i5]
> K6 <- Keys[i6]
> K7 <- Keys[i7]
> R <- cbind(K1,K2,K3,K4,K5,K6,K7)
> R[1:20,]
      K1               K2                                            K3                 K4            K5
 [1,] "subcommittee"   "peer"                                        "referee"          "peer-review" "peer"
 [2,] "recommendation" "referee"                                     "reply"            "peer"        "research"
 [3,] "subcommittee-b" "right-of-access"                             "weigh"            "quality"     "trial"
 [4,] "subcommittee-c" "psro"                                        "nsf"              "journal"     "quality"
 [5,] "report"         "peer-review"                                 "balance"          "referee"     "journal"
 [6,] "subcommittee-d" "rx"                                          "editorial"        "medical"     "medical"
 [7,] "subcommittee-a" "optional"                                    "ceci"             "research"    "clinical"
 [8,] "bankruptcy"     "anonymous"                                   "racism"           "publication" "control"
 [9,] "workman"        "national-institutes-of-health"               "patrick"          "care"        "referee"
[10,] "jurisdiction"   "continuing-education"                        "antitrust"        "assurance"   "publication"
[11,] "goodsir"        "jr."                                         "academic-freedom" "audit"       "science"
[12,] "paget"          "rubin"                                       "peter"            "clinical"    "management"
[13,] "nystagmus"      "cole"                                        "difficult"        "trial"       "analysis"
[14,] "1906"           "american-society-for-clinical-investigation" "anonymous"        "physician"   "disease"
[15,] "malinger"       "adamha"                                      "process"          "control"     "therapy"
[16,] "insurance"      "child-psychiatry"                            "antitrust-law"    "reliability" "health"
[17,] "james"          "problem-oriented"                            "risk-benefits"    "process"     "randomize"
[18,] "miner"          "nisbet"                                      "exclusionary"     "assessment"  "patient"
[19,] "sir"            "royal-australasian-college-of-radiologists"  "tabloid"          "evaluation"  "new"
[20,] "act"            "community-hospital"                          "harnad"           "manuscript"  "care"
      K6            K7
 [1,] "peer"        "systematic"
 [2,] "research"    "health"
 [3,] "quality"     "research"
 [4,] "journal"     "study"
 [5,] "health"      "management"
 [6,] "treatment"   "care"
 [7,] "publication" "peer"
 [8,] "randomize"   "intervention"
 [9,] "management"  "impact"
[10,] "patient"     "protocol"
[11,] "therapy"     "use"
[12,] "impact"      "literature"
[13,] "clinical"    "quality"
[14,] "analysis"    "outcome"
[15,] "assessment"  "analysis"
[16,] "systematic"  "patient"
[17,] "medical"     "risk"
[18,] "science"     "trial"
[19,] "care"        "therapy"
[20,] "outcome"     "randomize"
>
> P <- as.matrix(Q[2:8])
> for(i in 1:7) {maxp <- max(P[,i]); cat(i,'max=',maxp,'\n'); P[,i] <- (P[,i]/Q$t)*index(P[,i]/maxp,0.3)}
...
> R[1:20,]
      K1               K2                                            K3                 K4            K5
 [1,] "subcommittee"   "referee"                                     "referee"          "peer-review" "peer"
 [2,] "recommendation" "right-of-access"                             "weigh"            "peer"        "research"
 [3,] "subcommittee-b" "psro"                                        "reply"            "quality"     "trial"
 [4,] "subcommittee-c" "peer"                                        "nsf"              "journal"     "quality"
 [5,] "subcommittee-d" "peer-review"                                 "balance"          "referee"     "journal"
 [6,] "report"         "rx"                                          "ceci"             "medical"     "medical"
 [7,] "subcommittee-a" "optional"                                    "editorial"        "research"    "control"
 [8,] "bankruptcy"     "anonymous"                                   "racism"           "publication" "clinical"
 [9,] "workman"        "national-institutes-of-health"               "patrick"          "assurance"   "referee"
[10,] "jurisdiction"   "continuing-education"                        "antitrust"        "care"        "publication"
[11,] "goodsir"        "jr."                                         "academic-freedom" "audit"       "science"
[12,] "paget"          "rubin"                                       "peter"            "clinical"    "management"
[13,] "nystagmus"      "cole"                                        "difficult"        "trial"       "analysis"
[14,] "1906"           "american-society-for-clinical-investigation" "anonymous"        "physician"   "disease"
[15,] "malinger"       "adamha"                                      "antitrust-law"    "reliability" "therapy"
[16,] "insurance"      "child-psychiatry"                            "risk-benefits"    "control"     "health"
[17,] "james"          "problem-oriented"                            "exclusionary"     "process"     "randomize"
[18,] "miner"          "nisbet"                                      "tabloid"          "assessment"  "new"
[19,] "sir"            "royal-australasian-college-of-radiologists"  "harnad"           "evaluation"  "patient"
[20,] "act"            "community-hospital"                          "ceprime"          "manuscript"  "care"
      K6            K7
 [1,] "peer"        "systematic"
 [2,] "research"    "health"
 [3,] "quality"     "research"
 [4,] "journal"     "study"
 [5,] "health"      "management"
 [6,] "treatment"   "care"
 [7,] "publication" "intervention"
 [8,] "randomize"   "protocol"
 [9,] "management"  "peer"
[10,] "patient"     "impact"
[11,] "therapy"     "use"
[12,] "impact"      "literature"
[13,] "clinical"    "quality"
[14,] "analysis"    "outcome"
[15,] "assessment"  "analysis"
[16,] "medical"     "patient"
[17,] "science"     "risk"
[18,] "systematic"  "therapy"
[19,] "care"        "trial"
[20,] "outcome"     "controlled-trial"
>
> P <- as.matrix(Q[2:8])
> for(i in 1:7) {maxp <- max(P[,i]); cat(i,'max=',maxp,'\n'); P[,i] <- (P[,i]/Q$t)*index(P[,i]/maxp,0.5)}
...
> R[1:20,]
      K1               K2                                            K3                 K4            K5           
 [1,] "subcommittee"   "right-of-access"                             "referee"          "peer-review" "peer"       
 [2,] "subcommittee-b" "referee"                                     "weigh"            "peer"        "trial"      
 [3,] "subcommittee-c" "psro"                                        "reply"            "quality"     "research"   
 [4,] "recommendation" "peer-review"                                 "nsf"              "journal"     "quality"    
 [5,] "subcommittee-d" "rx"                                          "balance"          "referee"     "journal"    
 [6,] "subcommittee-a" "optional"                                    "ceci"             "medical"     "medical"    
 [7,] "report"         "anonymous"                                   "racism"           "research"    "control"    
 [8,] "bankruptcy"     "national-institutes-of-health"               "editorial"        "assurance"   "clinical"   
 [9,] "workman"        "continuing-education"                        "patrick"          "publication" "referee"    
[10,] "jurisdiction"   "peer"                                        "antitrust"        "audit"       "publication"
[11,] "goodsir"        "jr."                                         "academic-freedom" "care"        "science"    
[12,] "paget"          "rubin"                                       "peter"            "clinical"    "management" 
[13,] "nystagmus"      "cole"                                        "difficult"        "physician"   "analysis"   
[14,] "1906"           "american-society-for-clinical-investigation" "anonymous"        "trial"       "disease"    
[15,] "malinger"       "adamha"                                      "antitrust-law"    "reliability" "therapy"    
[16,] "insurance"      "child-psychiatry"                            "risk-benefits"    "control"     "randomize"  
[17,] "james"          "problem-oriented"                            "exclusionary"     "process"     "health"     
[18,] "miner"          "nisbet"                                      "tabloid"          "assessment"  "new"        
[19,] "sir"            "royal-australasian-college-of-radiologists"  "harnad"           "evaluation"  "patient"    
[20,] "act"            "community-hospital"                          "ceprime"          "manuscript"  "literature" 
      K6            K7                
 [1,] "peer"        "systematic"      
 [2,] "research"    "health"          
 [3,] "quality"     "research"        
 [4,] "journal"     "study"           
 [5,] "treatment"   "management"      
 [6,] "health"      "care"            
 [7,] "publication" "intervention"    
 [8,] "randomize"   "protocol"        
 [9,] "management"  "impact"          
[10,] "patient"     "peer"            
[11,] "therapy"     "use"             
[12,] "clinical"    "literature"      
[13,] "impact"      "outcome"         
[14,] "analysis"    "analysis"        
[15,] "assessment"  "quality"         
[16,] "medical"     "patient"         
[17,] "science"     "risk"            
[18,] "care"        "therapy"         
[19,] "systematic"  "trial"           
[20,] "outcome"     "controlled-trial"
> 
notes/net/keytim.txt · Last modified: 2016/03/21 05:29 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki