Additions wanted - please just fork and add.
- Parsing PDFs by Thomas Levine
- [Get Started With Scraping – Extracting Simple Tables from PDF Documents][scoda-simple-tables]
Additions wanted - please just fork and add.
2010-11-P01.csv:4:Vendor,Expense Description,Amount,Doc No,,,^M | |
2010-11-P02.csv:6:Vendor,Expense Description,Amount,Doc No,,,^M | |
2010-11-P03.csv:6:Document No","Amount | |
2010-11-P04-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
2010-11-P05-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
2010-11-P06-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
2010-11-P07-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
2010-11-P08-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
2010-11-P09-500.csv:1:Vendor ID,Vendor Name,Cost Element,Expenditure Account Code Description,SAP Document No,Amount £,Clearing Date^M | |
2010-11-P10-500.csv:1:Vendor ID,Vendor Name,Cos |
var nodeUtil = require("util"), | |
PFParser = require("pdf2json") | |
; | |
var pdfParser = new PFParser(); | |
pdfParser.on("pdfParser_dataReady", function(data) { | |
console.log('here'); | |
console.log(data); | |
console.log(data.data.Pages[0]); |
// Geocoding using Mapquest Nominatim API | |
// | |
// Documentation for the API: http://wiki.openstreetmap.org/wiki/Nominatim | |
// Here's an example query: http://open.mapquestapi.com/nominatim/v1/search?q=detroit&format=json | |
// geocode function | |
// | |
// :param place: is a place name like "Detroit" or "London" | |
// :callback: function receiving arguments (error, {lon: ..., lat: ...}) | |
function geocode(place, callback) { |
// convert a title to a slug | |
// | |
// lowercase, replace ' ' by '-' and remove everything that is not alphanumeric, underscore or dash | |
var slug = title | |
.toLowerCase() | |
.replace(/ /g, '-') | |
.replace(/--+/g, '-') | |
.replace(/[^\w-]+/g, '') | |
; |
import urlparse | |
import json | |
import requests | |
# set your api key for this work | |
apikey = 'XXXXX' | |
datapusher_url = 'http://datapusher-test.herokuapp.com' | |
ckan_url = 'http://datahub.io' | |
# gold prices | |
res_id = 'b9aae52b-b082-4159-b46f-7bb9c158d013' |
README is empty
README is empty
'''Run this script and it will export a list of all CKAN extensions on github | |
(guessed by repo name containing ckanext) to json and csv files in in this directory | |
''' | |
import urllib | |
import json | |
import csv | |
jsonfp = 'extensions-gh.json' | |
csvfp = 'extensions-gh.csv' |