Ross Jones rossjones

## gist:4761582
wget --header="Accept:application/rdf+xml" http://demo.ckan.org/dataset/gold-prices

## datahub-despam.py
import re
import sys
import ckanclient
import dateutil.parser
import datetime

em = re.compile('.*@(.*)')

spammy = ["yahoo.com", "hotmail.com", 'mindpowerup.com',
          "yahoo.co.uk", 'acumenwit.com', 'hotmail.fr',

## core.clj
(ns hangman.core
  (:gen-class)
  (:require [clojure.string :as str]))

(defn char_to_draw
    "Chooses whether to draw a _ or a char"
    [ch guesses]
    (if (is_char_in_words ch guesses)
        (str ch " ")
        (str "_ ")

## lovematch.py
#!/usr/bin/env python
# coding: utf-8

# lovematch asks for two names and determines their compatibility
# IPO imminent
import sys

VOWELS = "aeiou"

def get_score_for(person):

## gist:5570577
common = ['e','t', 'i', 's', 'o', 'n', 'h', 'r', 'a', 'f', 'u', 'l', 'd', 'g', 'm', 'w', 'p', 'y', 'c', 'b', 'v', 'k', 'x', 'j', 'q', 'z']

## import-from-classic.py
#!/usr/bin/env python
import urllib,requests
from lxml.html import fromstring

SCRAPER_NAME = "smr"

code = 'http://classic.scraperwiki.com/editor/raw/{0}'.format(SCRAPER_NAME)
db = 'https://classic.scraperwiki.com/scrapers/export_sqlite/{0}/'.format(SCRAPER_NAME)

lang_page = fromstring(requests.get('http://classic.scraperwiki.com/scrapers/{0}'.format(SCRAPER_NAME)).content)

## xls_template.php
<?php
error_reporting(E_ALL);
date_default_timezone_set('Europe/London');

$publisher =  $_GET["publisher_name"];
// TODO: Validate publisher name

/* For the given publisher, return the entire list of sub-publishers
   as a flat array of names (including the one provided). */
function get_subpublishers_for($name) {

## brute_force_ods.py
#!/usr/bin/env python
from lxml import etree

TABLE_NS = u"urn:oasis:names:tc:opendocument:xmlns:table:1.0"
TEXT_NS   = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"

def get_rows_from_file(doc):

    nodes = doc.xpath("//t:table[1]", namespaces={"t": TABLE_NS})
    if nodes:

## slightly_less_brute_force.py
#!/usr/bin/env python
from lxml import etree
import mmap

def ram_used(where):
    import resource
    print "func:{0}:{1}".format(where, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024)


def _fast_iter(context, func):

## escaper.py
#!/usr/bin/env python
"""
1. Connect to sqlite.
2. Write out data column into name.json


1. For each name, create a folder with that name, write the code into that folder.
2. Download the data file
3. Move the name.json into the folder (it's done)
"""
	import re
	import sys
	import ckanclient
	import dateutil.parser
	import datetime

	em = re.compile('.@(.)')

	spammy = ["yahoo.com", "hotmail.com", 'mindpowerup.com',
	"yahoo.co.uk", 'acumenwit.com', 'hotmail.fr',
	(ns hangman.core
	(:gen-class)
	(:require [clojure.string :as str]))

	(defn char_to_draw
	"Chooses whether to draw a _ or a char"
	[ch guesses]
	(if (is_char_in_words ch guesses)
	(str ch " ")
	(str "_ ")
	#!/usr/bin/env python
	# coding: utf-8

	# lovematch asks for two names and determines their compatibility
	# IPO imminent
	import sys

	VOWELS = "aeiou"

	def get_score_for(person):
	#!/usr/bin/env python
	import urllib,requests
	from lxml.html import fromstring

	SCRAPER_NAME = "smr"

	code = 'http://classic.scraperwiki.com/editor/raw/{0}'.format(SCRAPER_NAME)
	db = 'https://classic.scraperwiki.com/scrapers/export_sqlite/{0}/'.format(SCRAPER_NAME)

	lang_page = fromstring(requests.get('http://classic.scraperwiki.com/scrapers/{0}'.format(SCRAPER_NAME)).content)
	<?php
	error_reporting(E_ALL);
	date_default_timezone_set('Europe/London');

	$publisher = $_GET["publisher_name"];
	// TODO: Validate publisher name

	/* For the given publisher, return the entire list of sub-publishers
	as a flat array of names (including the one provided). */
	function get_subpublishers_for($name) {
	#!/usr/bin/env python
	from lxml import etree

	TABLE_NS = u"urn:oasis:names:tc:opendocument:xmlns:table:1.0"
	TEXT_NS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"

	def get_rows_from_file(doc):

	nodes = doc.xpath("//t:table[1]", namespaces={"t": TABLE_NS})
	if nodes:
	#!/usr/bin/env python
	from lxml import etree
	import mmap

	def ram_used(where):
	import resource
	print "func:{0}:{1}".format(where, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024)


	def _fast_iter(context, func):
	#!/usr/bin/env python
	"""
	1. Connect to sqlite.
	2. Write out data column into name.json


	1. For each name, create a folder with that name, write the code into that folder.
	2. Download the data file
	3. Move the name.json into the folder (it's done)
	"""