Differences

This shows you the differences between two versions of the page.

Link to this comparison view

notes:net:uspt [2015/07/16 22:29]
vlado created
notes:net:uspt [2017/03/22 01:45] (current)
vlado [Word clouds]
Line 134: Line 134:
  
 {{notes:pics:lcd.svg}} {{notes:pics:lcd.svg}}
 +
 +===== Titles / version 2017 =====
 +
 +PatNames.R
 +<code R>
 +# PatNames - branje naslovov patentov z US Patent Office
 +# http://patft.uspto.gov/netahtml/PTO/srchnum.htm
 +#   Nataša Kejžar, 9. april 2004
 +#   'zlikal' V.B., 27. november 2004
 +#   'posodobil' V.B., 21. marec 2017
 +# -------------------------------------------------------------
 +# na datoteki numbers.txt pripravimo seznam številk patentov,
 +# v obliki s priložene datoteke stevilke.txt. Program PatNames
 +# bo na datoteko titles.txt izpisal ustrezne naslove.
 +# !!! pred uporabo je potrebno naložiti paket  XML
 +# -------------------------------------------------------------
 +
 +doloci.opis <- function(num){
 +# najprej sestavimo naslov
 +  if(regexpr("island",num)>0){
 +    opis <- paste('---',num)
 +    cat("Island ",num,"\n"); flush.console()
 +  } else {
 +    a1 <- "http://patft.uspto.gov/netacgi/nph-Parser?"
 +    b1 <- "Sect1=PTO1&Sect2=HITOFF&d=PALL&p=1&u=%2Fnetahtml"
 +    c1 <- "%2FPTO%2Fsrchnum.htm&r=1&f=G&l=50&s1="
 +    a2 <- ".PN.&OS=PN/"
 +    a3 <- "&RS=PN/"
 +    num.c <- as.character(num)
 +    cat(num,"\n"); flush.console()
 +    url.s <- paste(a1,b1,c1,num.c,a2,num.c,a3,num.c,sep="")
 +
 +  # poberi tekočo stran s spleta in jo razčleni
 +  # prepreči sporočila 'There were 50 or more warnings  ...'
 +    op <- options(); options(warn=-1)
 +    hp <- htmlTreeParse(url.s); options(op)
 +    html.s <- unlist(hp$children$html$children)
 +
 +    if(any(regexpr('\"Images\"',html.s)>0)){
 +      opis <- c(num,'*** Full text is not available,',
 +        '    see image version')
 +    } else {
 +    # poiščemo točno določeno vrstico
 +    # 4 elemente za 2. značko <hr>
 +      i <- which(regexpr("hr",html.s)>0)[2]
 +      naslov <- html.s[i+4]
 +    # poiscemo še datum, predhodnik 2. značke <hr>
 +      datum <- html.s[i-1]
 +      names(naslov) <- NULL; names(datum) <- NULL
 +      opis <- c(num,naslov,datum)
 +    }
 +  }
 +  opis
 +}
 +
 +#  library(XML)
 +#  setwd("C:/Users/batagelj/work/R/patents")
 +#
 +## preberemo številke iz znakovne datoteke v num
 +#  num <- readLines("./numbers.txt")
 +#
 +## določi in izpiši ustrezni seznam naslovov
 +#  writeLines(unlist(sapply(num,doloci.opis)),"./titles.txt")
 +</code>
 +
 +numbers.txt
 +<code>
 +island 3
 +5010649
 +4926557
 +4426780
 +4347666
 +4245454
 +4254550
 +4151646
 +4095338
 +4134204
 +4104796
 +4047299
 +4007525
 +3892037
 +3928911
 +3664102
 +3693255
 +3826068
 +island 1
 +4604106
 +4215999
 +island 2
 +4229186
 +4252102
 +</code>
 +
  
notes/net/uspt.txt · Last modified: 2017/03/22 01:45 by vlado
 
Except where otherwise noted, content on this wiki is licensed under the following license: CC Attribution-Noncommercial-Share Alike 3.0 Unported
Recent changes RSS feed Donate Powered by PHP Valid XHTML 1.0 Valid CSS Driven by DokuWiki