Skip to content

Instantly share code, notes, and snippets.

@hendrixjoseph
Last active July 28, 2020 20:40
Show Gist options
  • Save hendrixjoseph/29e4b9b9b61d3a4ba4bd7a80aa111764 to your computer and use it in GitHub Desktop.
Save hendrixjoseph/29e4b9b9b61d3a4ba4bd7a80aa111764 to your computer and use it in GitHub Desktop.
package com.joehxblog;
import java.io.FileWriter;
import java.io.IOException;
import java.util.function.Consumer;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
public class PatentCounter {
public static final String URL = "https://patents.google.com/patent/US";
public static void main(final String... args) {
try (FileWriter writer = new FileWriter("patents.csv")) {
writer.write("count,year,month,day" + System.lineSeparator());
final int start = 1;
final int end = 10_709_051;
final int increment = 97_355;
// Got to use this type of for loop rather than an
// IntStream due to a possible thrown exception.
for (int i = start; i <= end; i += increment) {
try {
final Document doc = Jsoup.connect(URL + i).get();
// <time itemprop="publicationDate" datetime="1836-07-13">1836-07-13</time>
final Elements granted = doc.getElementsByAttributeValue("itemprop", "publicationDate");
write(t -> {
try {
writer.write(t);
} catch (final IOException e) {
e.printStackTrace();
}
}, i, granted);
write(System.out::print, i, granted);
} catch (final HttpStatusException e) {
System.out.println("Patent " + i + " not found.");
}
}
} catch (final IOException e) {
e.printStackTrace();
}
}
public static void write(final Consumer<String> writer, final int i, final Elements granted) {
writer.accept(i + ",");
writer.accept(granted.first().text().replace('-', ','));
writer.accept(System.lineSeparator());
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.joehxblog</groupId>
<artifactId>patent.counter</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>Patent Counter</name>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
</dependencies>
</project>
@hendrixjoseph
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment