====== NBER US patents ====== http://patft.uspto.gov/netahtml/PTO/srchnum.htm ===== Obtaining titles of selected patents ===== # PatNames - branje naslovov patentov z US Patent Office # http://patft.uspto.gov/netahtml/PTO/srchnum.htm # Nataša Kejžar, 9. april 2004 # 'zlikal' V.B., 27. november 2004 # ------------------------------------------------------------- # na datoteki numbers.txt pripravimo seznam številk patentov, # v obliki s priložene datoteke. Program PatNames bo na datoteko # titles.txt izpisal ustrezne naslove. # !!! pred uporabo je potrebno naložiti paket XML # ------------------------------------------------------------- doloci.opis <- function(num){ if(regexpr("island",num)>0){ opis <- paste('---',num) } else { # najprej sestavimo spletni naslov num.c <- as.character(num) url.s <- paste("http://patft.uspto.gov/netacgi/nph-Parser?", "Sect1=PTO1&Sect2=HITOFF&d=PALL&p=1&", "u=/netahtml/PTO/srchnum.htm&r=1&f=G&l=50&s1=",num.c, ".PN.&OS=PN/",num.c,"&RS=PN/",num.c,sep="") # poberemo tekočo stran s spleta in jo razčlenimo # preprečimo sporočila 'There were 50 or more warnings ...' op <- options(); options(warn=-1) hp <- htmlTreeParse(url.s); options(op) html.s <- unlist(hp$children$html$children) if(any(regexpr('\"Images\"',html.s)>0)){ opis <- paste(num.c,'*** Full text is not available,', 'see image version') } else { # poiščemo točno določeno vrstico 4 elemente za 2. značko
i <- which(regexpr("hr",html.s)>0)[2] naslov <- html.s[i+4] # poiščemo še datum, predhodnik 2. značke
datum <- html.s[i-1] names(naslov) <- NULL; names(datum) <- NULL opis <- paste(num.c,datum,naslov) } } opis } setwd("C:/Users/batagelj/Documents/manuscripts/Cite/R") library(XML) # preberemo številke iz znakovne datoteke v num num <- readLines("./numbers.txt") # določimo in izpišemo ustrezni seznam naslovov writeLines(unlist(sapply(num,doloci.opis)),"./titles.txt")
island 3 5010649 4926557 4426780 4347666 4245454 4254550 4151646 4095338 4134204 4104796 4047299 4007525 3892037 3928911 3664102 3693255 3826068 island 1 4604106 4215999 4229186 4252102 island 2 3512876 3697150 4514044 5243451 --- island 3 5010649 ** Cutterhead for a vegetation cutter apparatus 4926557 ** Semi-automatic string trimmer head 4426780 January 24, 1984 Line metering apparatus 4347666 September 7, 1982 Apparatus for cutting vegetation 4245454 January 20, 1981 Line metering apparatus 4254550 March 10, 1981 Lawn trimmer 4151646 May 1, 1979 Flexible line trimmer with line feeding apparatus 4095338 June 20, 1978 Lawn trimmer equipped with flexible line cutting edges 4134204 January 16, 1979 Rotary flail cutter system 4104796 August 8, 1978 Filament-type trimmer 4047299 September 13, 1977 Apparatus for trimming vegetation 4007525 February 15, 1977 Spool for a vegetation cutting apparatus having a flexible line cutting element 3892037 July 1, 1975 Grass cutting, trimming and edging device 3928911 ** Hollow-shaft feed filament trimmer 3664102 May 23, 1972 CUTTER MECHANISM FOR AGRICULTURAL MACHINES OR LAWN MOWERS 3693255 September 26, 1972 LAWN EDGE TRIMMER 3826068 ** ROTARY CUTTING ASSEMBLY --- island 1 4604106 August 5, 1986 Composite polycrystalline diamond compact 4215999 August 5, 1980 Abrasive compact with a core of high rigidity material 4229186 October 21, 1980 Abrasive bodies 4252102 February 24, 1981 Cutting element for processing rocks, metal or the like --- island 2 3512876 *** Full text is not available, see image version 3697150 October 10, 1972 ELECTRO-OPTIC SYSTEMS IN WHICH AN ELECTROPHORETIC-LIKE OR DIPOLAR MATERIAL IS DISPERSED THROUGHOUT A LIQUID CRYSTAL TO REDUCE THE TURN-OFF TIME 4514044 April 30, 1985 1-(Trans-4-alkylcyclohexyl)-2-(trans-4'-(p-substituted phenyl) cyclohexyl)ethane and liquid crystal mixture 5243451 ** DAP type liquid crystal device with cholesteric liquid crystal birefringent layer ===== Word clouds ===== To produce the word cloud go to the Jason Davies' [[http://www.jasondavies.com/wordcloud/#|Word Cloud Generator]] and copy the text into the entry field area. The obtained word cloud save in SVG format (an option provided by the generator). Use InkScape to transform it into PDF format. Here is the word cloud for the main islands (Liquid crystal display) for US patents -1999. {{notes:pics:lcd.svg}} ===== Titles / version 2017 ===== PatNames.R # PatNames - branje naslovov patentov z US Patent Office # http://patft.uspto.gov/netahtml/PTO/srchnum.htm # Nataša Kejžar, 9. april 2004 # 'zlikal' V.B., 27. november 2004 # 'posodobil' V.B., 21. marec 2017 # ------------------------------------------------------------- # na datoteki numbers.txt pripravimo seznam številk patentov, # v obliki s priložene datoteke stevilke.txt. Program PatNames # bo na datoteko titles.txt izpisal ustrezne naslove. # !!! pred uporabo je potrebno naložiti paket XML # ------------------------------------------------------------- doloci.opis <- function(num){ # najprej sestavimo naslov if(regexpr("island",num)>0){ opis <- paste('---',num) cat("Island ",num,"\n"); flush.console() } else { a1 <- "http://patft.uspto.gov/netacgi/nph-Parser?" b1 <- "Sect1=PTO1&Sect2=HITOFF&d=PALL&p=1&u=%2Fnetahtml" c1 <- "%2FPTO%2Fsrchnum.htm&r=1&f=G&l=50&s1=" a2 <- ".PN.&OS=PN/" a3 <- "&RS=PN/" num.c <- as.character(num) cat(num,"\n"); flush.console() url.s <- paste(a1,b1,c1,num.c,a2,num.c,a3,num.c,sep="") # poberi tekočo stran s spleta in jo razčleni # prepreči sporočila 'There were 50 or more warnings ...' op <- options(); options(warn=-1) hp <- htmlTreeParse(url.s); options(op) html.s <- unlist(hp$children$html$children) if(any(regexpr('\"Images\"',html.s)>0)){ opis <- c(num,'*** Full text is not available,', ' see image version') } else { # poiščemo točno določeno vrstico # 4 elemente za 2. značko
i <- which(regexpr("hr",html.s)>0)[2] naslov <- html.s[i+4] # poiscemo še datum, predhodnik 2. značke
datum <- html.s[i-1] names(naslov) <- NULL; names(datum) <- NULL opis <- c(num,naslov,datum) } } opis } # library(XML) # setwd("C:/Users/batagelj/work/R/patents") # ## preberemo številke iz znakovne datoteke v num # num <- readLines("./numbers.txt") # ## določi in izpiši ustrezni seznam naslovov # writeLines(unlist(sapply(num,doloci.opis)),"./titles.txt")
numbers.txt island 3 5010649 4926557 4426780 4347666 4245454 4254550 4151646 4095338 4134204 4104796 4047299 4007525 3892037 3928911 3664102 3693255 3826068 island 1 4604106 4215999 island 2 4229186 4252102