Skip to content

Instantly share code, notes, and snippets.

@yanofsky
Created December 14, 2012 15:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yanofsky/4286461 to your computer and use it in GitHub Desktop.
Save yanofsky/4286461 to your computer and use it in GitHub Desktop.
The way I calculated the amount of printing, and number of lines, on every Form 1040 ever
import glob
from PIL import Image
import json
def calculate_black_stats():
data = {}
files = glob.glob("png/*.png")
for f in files:
year = f.split(".pdf")[0].split("__")[1]
page = int(f.split("-")[1].split(".png")[0])
if year not in data:
data[year] = {"pages":[0,0,0,0,0,0,0,0,0,0,0,0,0],"total":0, "size":0}
image = Image.open(f)
data[year]["pages"][page] = amount_of_black(image)[0][0]
w, h = image.size
data[year]["size"] += (w * h)
print year, page, data[year]["pages"][page]
for year in data:
for page in data[year]["pages"]:
data[year]["total"] += page
print "*******************\n\n\n\n"
for year in data:
print year, page, data[year]["total"]
f = open("outdataCrop.json","w")
f.write(json.dumps(data,indent=4))
f.close()
f = open("outdataCrop.csv","w")
f.write(",".join(["year","size","total","ratio","0","1","2","3","4","5","6","7","8","9","10"])+"\n")
for year in data:
f.write("%s,%s,%s,%s,%s\n" % (year,data[year]["size"],data[year]["total"],data[year]["total"]/data[year]["size"],",".join([str(s) for s in data[year]["pages"]]),))
f.close()
pass
def amount_of_black(image):
i = image
i = i.convert("L")
return i.getcolors()
calculate_black_stats()
import glob
from PIL import Image
import json
def interactive_line_count():
data = {}
files = glob.glob("png/*.png")
for f in files:
year = f.split(".pdf")[0].split("__")[1]
page = int(f.split("-")[1].split(".png")[0])
if year not in data:
data[year] = 0
image = Image.open(f)
image.show()
data[year] = ask_for_data(data[year])
print year, "now at:", data[year]
f = open("numlines.json","w")
f.write(json.dumps(data,indent=4))
f.close()
f = open("numlines.csv","w")
f.write("year,number of lines\n")
for year in data:
f.write("%s,%s\n" % (year,data[year]))
f.close()
def ask_for_data(val):
value = raw_input("How Many Lines? ")
if "r" in value:
val = int(value[1:])
elif "s" in value:
None
elif "a" in value:
val += int(value[1:])
else:
print "ERROR, try again"
val = ask_for_data(val)
return val
interactive_line_count()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment