Skip to content

Instantly share code, notes, and snippets.

@tonyb486
Created January 30, 2018 19:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tonyb486/9050807f9e09fe1ef15360d34c75eeba to your computer and use it in GitHub Desktop.
Save tonyb486/9050807f9e09fe1ef15360d34c75eeba to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import sys, csv, sqlite3
csv.field_size_limit(sys.maxsize)
conn = sqlite3.connect("patent.db")
cur = conn.cursor()
cur.execute('''CREATE TABLE patent
(id varchar(20), type varchar(20), number varchar(20),
country varchar(20), date date, abstract text, title text,
kind varchar(10), num_claims int, filename varchar(120))''')
print("Adding patent table...")
with open("patent.tsv") as tsvfile:
reader = csv.reader(tsvfile, delimiter="\t")
for i, row in enumerate(reader):
try: cur.execute("INSERT INTO patent VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", row)
except: print("\nWARNING: Failure with row %d." % i)
if i%100000 == 0:
sys.stdout.write("\rpatent row: %10d" %i)
sys.stdout.flush()
conn.commit()
print("\nAdding cpc_current table...")
cur.execute('''CREATE TABLE cpc_current
(uuid varchar(36), patent_id varchar(20), section_id varchar(10),
subsection_id varchar(20), group_id varchar(20), subgroup_id varchar(20),
category varchar(20), sequence int)''')
with open("cpc_current.tsv") as tsvfile:
reader = csv.reader(tsvfile, delimiter="\t")
for i, row in enumerate(reader):
try: cur.execute("INSERT INTO cpc_current VALUES (?, ?, ?, ?, ?, ?, ?, ?)", row)
except: print("\nWARNING: Failure with row %d." % i)
if i%100000 == 0:
sys.stdout.write("\rcpc_current row: %10d" %i)
sys.stdout.flush()
conn.commit()
print("\nComplete.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment