Skip to content

Instantly share code, notes, and snippets.

@aniemerg
Created June 10, 2012 01:10
Show Gist options
  • Save aniemerg/2903364 to your computer and use it in GitHub Desktop.
Save aniemerg/2903364 to your computer and use it in GitHub Desktop.
Counts various statistics of claims, such as total claims, number of independent claims, dependent claims
# ClaimCounts()
# Counts various statistics of claims, such as total claims,
# number of independent claims, dependent claims
# files to create database can be found at:
# https://github.com/aniemerg/Patent-Tools
# written on May 30, 2012 for a blog post
import MySQLdb as mdb
import sys
import datetime
import re
from pprint import pprint
import math
def ClaimCounts():
print "Started Counting Claims at Time:"
print str(datetime.datetime.now())
#mdb.connect('localhost', 'username', 'password', 'database');
con = mdb.connect('localhost', 'root', 'password', 'database');
with con:
cur = con.cursor()
cur.execute("SELECT USPatents.claims from USPatents LIMIT 10000")
rows = cur.fetchall()
#dictionary for the number of claims
totalclaims = {}
#dictionary for the number of independent claims
indclaims = {}
#dictionary for the number of dependent claims
depclaims = {}
#Process the claims
for x in rows:
claims = []
claims = re.split("\d\.\ ", x[0])
claims = claims[1:]
totalclaims[len(claims)] = totalclaims.get(len(claims),0) + 1
indclaimsc = 0
for claim in claims:
if re.findall('claim', claim) == []:
indclaimsc += 1
indclaims[indclaimsc] = indclaims.get(indclaimsc, 0) +1
if len(claims) < indclaimsc:
print "There is a serious error: More claims than independent claims were found!"
depclaims[len(claims) - indclaimsc] = depclaims.get(len(claims) - indclaimsc, 0) + 1
#Now write this stuff out to a file
#Save Total Claims to File
outfile = open('Total_Claim_Counts.csv', 'w')
results = totalclaims.items()
results.sort()
for result in results:
line = "[\'%s\', %s],\n" % (result[0], result[1])
outfile.write(line)
outfile.close()
#Save Independent Claims to File
outfile = open('Independent_Claim_Counts.csv', 'w')
results = indclaims.items()
results.sort()
for result in results:
line = "[\'%s\', %s],\n" % (result[0], result[1])
outfile.write(line)
outfile.close()
#Save Independent Claims to File
outfile = open('Dependent_Claim_Counts.csv', 'w')
results = depclaims.items()
results.sort()
for result in results:
line = "[\'%s\', %s],\n" % (result[0], result[1])
outfile.write(line)
outfile.close()
if __name__ == '__main__':
ClaimCounts()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment