Skip to content

Instantly share code, notes, and snippets.

import os
import logging
logger = logging.getLogger(__name__)
ch = logging.StreamHandler()
# A simple script to backup an organization's GitHub repositories.
GHBU_BACKUP_DIR=${GHBU_BACKUP_DIR-"github-backups"} # where to place the backup files
GHBU_ORG=${GHBU_ORG-"IATI"} # the GitHub organization whose repos will be backed up
GHBU_UNAME=${GHBU_UNAME-"bjwebb"} # the username of a GitHub account (to use with the GitHub API)
#GHBU_PASSWD=${GHBU_PASSWD-""} # the password for that account
GHBU_GITHOST=${GHBU_GITHOST-""} # the GitHub hostname (see notes)
GHBU_PRUNE_OLD=${GHBU_PRUNE_OLD-true} # when `true`, old backups will be deleted
GHBU_PRUNE_AFTER_N_DAYS=${GHBU_PRUNE_AFTER_N_DAYS-3} # the min age (in days) of backup files to delete
import pkg_resources
def get_pkg_licenses(pkg):
lines = pkg.get_metadata_lines('METADATA')
except (KeyError, IOError):
lines = pkg.get_metadata_lines('PKG-INFO')
license = None
license_classifiers = []
View 1_spreadsheet.csv
@last-updated-datetime @xml:lang @default-currency @humanitarian @hierarchy @linked-data-uri iati-identifier reporting-org/@ref reporting-org/@type reporting-org/@secondary-reporter reporting-org/narrative/0 reporting-org/narrative/0/@xml:lang title/narrative/0 title/narrative/0/@xml:lang description/0/@type description/0/narrative/0 description/0/narrative/0/@xml:lang participating-org/0/@ref participating-org/0/@type participating-org/0/@role participating-org/0/@activity-id participating-org/0/narrative/0 participating-org/0/narrative/0/@xml:lang other-identifier/0/@ref other-identifier/0/@type other-identifier/0/owner-org/@ref other-identifier/0/owner-org/narrative/0 other-identifier/0/owner-org/narrative/0/@xml:lang activity-status/@code activity-date/0/@type activity-date/0/@iso-date activity-date/0/narrative/0 activity-date/0/narrative/0/@xml:lang contact-info/0/@type contact-info/0/organisation/narrative/0 contact-info/0/organisation/narrative/0/@xml:lang contact-info/0/department/narrative/0 contact-
View 0_xquery.xq
for $org in /iati-organisations/iati-organisation
let $code := if ($org/organisation-identifier)
then $org/organisation-identifier/text()
else $org/iati-identifier/text()
return if ($code = $org/reporting-org/@ref/data()) then
{if ($org/name/narrative)
then $org/name else
View 0_xquery.xq
declare namespace output = "";
declare option output:method "csv";
declare option output:csv "header=yes, separator=comma";
for $org in /iati-organisations/iati-organisation
return <record>
if ($org/organisation-identifier)
then $org/organisation-identifier/text()
else $org/iati-identifier/text()
import json_merge_patch
import flattentool
import traceback
import requests
import json
import copy
import os
release_schema = requests.get('').json()
import csv
import json
releases = []
csv.field_size_limit(500 * 1024 * 1024)
with open('uk_contracts_finder2_ocds.csv') as fp:
reader = csv.DictReader(fp)
for row in reader:
Bjwebb /
Last active Sep 18, 2016
PYCon UK 2016 lighning talk on OCDS


I'm going to start with a large number, 9.5 trillion dollars, which as a pile of dollar notes would stretch to the moon and back.

This is the total global spending through govermentment contracting, which is 15% of global GDP.

If you want an accountable govement, it's important to have public data about all this government contracting. Sadly this is often lacking at the moment -

Bjwebb / mapping.json
Last active Jan 21, 2016
2.02 CSV2IATI mapping template
View mapping.json
"organisation": {
"reporting-org": {
"text": "",
"ref": "",
"type": ""
"default-currency": "",
"lang": "",
"contact-info": {