Create a gist now

Instantly share code, notes, and snippets.

@amandabee /README.md
Last active Aug 29, 2015

A handful of pretty random python scripts.

Just ... some scripts. Nothing special.

""" Generate PDF bingo cards."""
import os
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
from reportlab.lib.colors import Color
import random
""" TO DO: use better fonts.
from reportlab.pdfbase.ttfonts import TTFont
Then can register TTF with
pdfmetrics.registerFont(TTFont('Vera','Vera.ttf'))
"""
def set_canvas(filename):
"""Take a filename and initiate a canvas object that we can draw to."""
this_canvas = canvas.Canvas(filename)
this_canvas.setPageSize(letter)
# Was setting font here, turns out to be easier to set on draw strings.
return this_canvas
def get_box_width(word="bingo", page_size=letter, page_margin=1 * inch):
"""For any word, and any page dimensions, figure out how wide your boxes can
be to accomodate the word and fill the page.
!!! FIX: If I was clever, I'd set the base font size here, too.
"""
#page_margin = page_margin * inch
available_width = page_size[0] - page_margin * 2
box_width = available_width / len(word)
return box_width
def get_coords(word="bingo"):
"""
Take some word and get a list of tuples of x,y coordinates for each letter,
and two list, one of x values, another of y values, that can be used to draw
a grid
"""
box = get_box_width(word)
# Start x so this is centered
# x is where the first letter gets drawn
# so the gutter is 0.5 boxes away.
x_coordinate = 0.5 * box + (letter[0] - box * len(word)) / 2
y_offset = 0.5 * box + (letter[1] - box * (len(word) + 1)) / 2
y_coordinate = 11 * inch - y_offset
x_values = [] # will get one extra value
y_values = []
for _unused in range(0, len(word) + 1):
x_values.append(x_coordinate)
x_coordinate = x_coordinate + box
y_values.append(y_coordinate)
y_coordinate = y_coordinate - box
x_values.pop()
coords = []
for x in x_values:
for y in y_values:
# print x, y
coords.append((x, y))
x_list = []
y_list = []
# Set the X and Y values for the grid
for x in x_values:
x_list.append(int(x - box * 0.5))
x_list.append(int(x_values[-1] + box * 0.5))
for y in y_values:
y_list.append(int(y + box * 0.65))
# This math is cheating: I just looked at what gets returned
# to figure out that it was off by 13.
y_list.append(int(y_values[-1] - box * 0.5 + 13))
return x_list, y_list, coords
def set_ranges(word="bingo", i=15):
"""Takes a word (string) and an interval (integer) and returns a dict of max
and min values for each letter. Defaults to "bingo" with an interval of 15.
The resulting dict also includes the starting word.
"""
ranges = {}
ranges['word'] = word
j = 0
for ltr in word:
ranges[ltr] = [j + 1, j + i]
j = j + i
return ranges
def set_strings(ranges):
"""Takes a dictionary produced by set_ranges() and generates a list of
random numbers for each letter in the range. How many random numbers depends
on how longthe base word is.
"""
# Get the word we're working with (probably "bingo")
word = ranges['word']
# How long is the word?
length = len(word)
strings = []
# For each letter in the word, pick length random numbers in the range.
for ltr in word:
strings.append(str.upper(ltr))
random_numbers = random.sample(
range(ranges[ltr][0], ranges[ltr][1] + 1), length)
for i in random_numbers:
strings.append(i)
# Put a free cell in the middle, but only if word length is odd
if len(word) % 2 == 0:
print "No free cell"
else:
mid = len(strings) / 2
strings[mid] = "FREE"
return strings
def draw_grid(this_canvas, coords):
""" Takes a canvas instance and x and y coordinate lists returned by
get_coords() and draws a grid (in red) on the canvas."""
this_canvas.setLineWidth(2.0)
red50transparent = Color(100, 0, 0, alpha=0.5)
this_canvas.setStrokeColor(red50transparent)
# c.setStrokeGrey(0.75)
x_list = coords[0]
y_list = coords[1]
this_canvas.grid(x_list, y_list)
def draw_strings(this_canvas, coords, strings):
""" Takes a canvas, a list of tuples, a list of strings/ integers and draws
each string on the canvas according to the coordinates. Assumes that
"strings" includes letters as column headers and integers for the grid
itself.
!!! FIX: If the boxes are too small, the font should get reduced.
"""
for i in range(0, len(strings)):
# Print the "Free" cell in a smaller font.
if (strings[i] == "FREE"):
# print strings[i]
this_canvas.setFont('Helvetica', 28)
elif isinstance(strings[i], str):
this_canvas.setFont('Courier-Bold', 42)
# print "string"
else:
this_canvas.setFont('Helvetica-Bold', 36)
printable_string = str(strings[i])
x = coords[i][0]
y = coords[i][1]
this_canvas.drawCentredString(x, y, printable_string)
def draw_cards(path, filename="bingo.pdf", i=45, word="bingo"):
"""Take a path, a filename, some integer (i), some word. Draw i bingo
cards at path/filename.
"""
os.chdir(path)
# The canvas, ranges and coordinates only need to be set once.
this_canvas = set_canvas(filename)
this_ranges = set_ranges(word)
this_coordinates = get_coords(word)
this_coord_tuples = this_coordinates[2]
# Generate new random strings for each card.
for _unused in range(0, i):
card_strings = set_strings(this_ranges)
draw_strings(this_canvas, this_coord_tuples, card_strings)
draw_grid(this_canvas, this_coordinates)
this_canvas.save()
"""
draw_cards("~", "bingo.pdf", 13, "bingo")
"""
"""
Feb 2014: I started trying to scrape this w/Beautiful Soup, but it turns out the data is all in JSON.
Extract all facility addresses from http://www.bop.gov/locations/list.jsp
"""
import urllib2
import json
import csv
url = "http://www.bop.gov/PublicInfo/execute/locations?todo=query&output=json"
json_string = urllib2.urlopen(url).read()
## Load the string of JSON into a dict
jsondata = json.loads(json_string)
## Review the keys of the dict
## or just use http://jsbeautifier.org/ to see what it looks like
for item in jsondata:
print item
## So I know there are three top level items
## Get the full list of items in "Locations"
for item in jsondata['Locations'][0]:
print item
### Open a CSV WRiter
f=csv.writer(open('/tmp/locations.csv','wb'))
###and write to it.
# and write to it.
for item in jsondata['Locations']:
f.writerow(
[item['hasFsl'],
item['code'],
item['contactEmail'],
item['special'],
item['city'],
item['privateFacl'],
item['nameDisplay'],
item['faclTypeDescription'],
item['state'],
item['phoneNumber'],
item['latitude'],
item['type'],
item['locationtype'],
item['zipCode'],
item['hasCamp'],
item['complexCode'],
item['address'],
item['securityLevel'],
item['name'],
item['gender'],
item['region'],
item['longitude'],
item['hasFdc'],
item['timeZone'],
item['nameTitle']])
#!/usr/bin/env python
### Jan 2014
### This is rough, but works. I wanted a spreadsheet of MLB Salaries for a
### basic lesson on means and medians and how wildly extravagant salaries
### distort the mean. So I scraped the data from Newsday's salary database.
#import scraperwiki
import urllib2
from bs4 import BeautifulSoup
import csv
def get_soup(url):
#soup = BeautifulSoup(scraperwiki.scrape(url))
soup = BeautifulSoup(urllib2.urlopen(url))
return soup
def get_salaries(soup, linewriter):
table = soup.find("table", {"id":"sdb-results"})
for row in table.findAll('tr'):
cells = row.find_all("td")
try:
data = {
'player' : cells[0].get_text().strip(),
'team' : cells[1].get_text().strip(),
'position' : cells[2].get_text().strip(),
'state' : cells[3].get_text().strip(),
'league' : cells[4].get_text().strip(),
'division' : cells[5].get_text().strip(),
'2013_salary' : cells[6].get_text().strip('$,').strip(),
'age' : cells[7].get_text().strip()
}
#scraperwiki.sqlite.save(unique_keys=['player'],data=data)
linewriter.writerow(data)
print "Saved " + data['player']
except Exception,e:
print str(e)
base_url = "http://data.newsday.com/long-island/data/baseball/mlb-salaries-2013/?currentRecord="
print range(1, 854, 50)
with open('/home/amanda/Desktop/mlb_salaries_alt.csv', 'a+') as csvfile:
fieldorder = ['player' , 'team' , 'position' , 'state' ,
'league', 'division', '2013_salary', 'age']
linewriter = csv.DictWriter(csvfile, fieldorder, delimiter='|',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
for record in range(1, 854, 50):
print "starting..."
url = base_url + str(record);
soup = get_soup(url)
get_salaries(soup, linewriter)
print url;
"""
Pull down a bunch of public mailman archives.
In this case, they're archived monthly and the URLs look something like this:
http://lists.example.net/pipermail/listname/2010-October.txt.gz
"""
import urllib2
import calendar
import gzip
import os
def get_all_gzs(base, years):
"""
for some base URL (unique to your list) and range of years,
download all the archives.
"""
archive_folder = 'list_archives'
if not os.path.exists(archive_folder):
os.makedirs(archive_folder)
for year in years:
print year
for month in calendar.month_name:
filename = str(year) + "-" + month + ".txt.gz"
print filename
url = base + filename
print url
try:
req = urllib2.urlopen(url)
output = open(filename, 'wb')
output.write(req.read())
output.close()
with gzip.open(filename, 'rb') as z:
file_content = z.read()
textfile = archive_folder + "/" + \
str(year) + "-" + month + ".txt"
f = open(textfile, 'w')
f.write(file_content)
f.close
except Exception as e:
print e
URLBASE = "http://lists.example.net/pipermail/listname/"
YEARS = range(2008, 2016)
get_all_gzs(URLBASE, YEARS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment