Skip to content

Instantly share code, notes, and snippets.

import urlparse
import collections
urls = (l.strip() for l in open('urls.txt') if len(l.strip()))
data = collections.defaultdict(set)
for url in urls:
domain = urlparse.urlparse(url).netloc
data[domain].add(url)
@matthewrobertbell
matthewrobertbell / gist:5382797
Created April 14, 2013 13:48
lists gonna list
import urlparse
import collections
urls = (l.strip() for l in open('urls.txt') if len(l.strip()))
data = collections.defaultdict(set)
for url in urls:
domain = urlparse.urlparse(url).netloc
data[domain].add(url)
@matthewrobertbell
matthewrobertbell / gist:5382717
Last active December 16, 2015 05:09
Do you even code bro?
import urlparse
import collections
def tree():
return collections.defaultdict(tree)
urls = (l.strip() for l in open('urls.txt') if len(l.strip()))
data = tree()
for url in urls:
@matthewrobertbell
matthewrobertbell / gist:5168688
Last active December 14, 2015 23:49
Nested classes, with autorouting, see https://github.com/mattseh/flask-classy
from flask import Flask, url_for
from flask.ext.classy import FlaskView
# we'll make a list to hold some quotes for our app
quotes = [
"A noble spirit embiggens the smallest man! ~ Jebediah Springfield",
"If there is a way to do it better... find it. ~ Thomas Edison",
"No one knows what he can do till he tries. ~ Publilius Syrus"
]
from flask import Flask
from flask.ext.classy import FlaskView
# we'll make a list to hold some quotes for our app
quotes = [
"A noble spirit embiggens the smallest man! ~ Jebediah Springfield",
"If there is a way to do it better... find it. ~ Thomas Edison",
"No one knows what he can do till he tries. ~ Publilius Syrus"
]
try:
date, job_id = open('job_counter.txt').read().split(',')
job_id = int(job_id) + 1
if date != str(datetime.date.today()):
job_id = 1
except:
job_id = 1
open('job_counter.txt', 'w').write('{date},{job_id}'.format(date=datetime.date.today(), job_id=job_id))
@matthewrobertbell
matthewrobertbell / redditmonitor.py
Created December 24, 2011 12:47
Reddit Frontpage Monitor
import web
import time
seen = set()
while True:
page = web.grab('http://www.reddit.com')
if page:
threads = set(page.xpath('//p[@class="title"]/a/@href||//p[@class="title"]/a/text()'))
new_threads = threads - seen
from functools import partial
import time
def coroutine(func):
def start(*args,**kwargs):
cr = func(*args,**kwargs)
cr.next()
return cr
return start
@matthewrobertbell
matthewrobertbell / randomlines.py
Created December 11, 2011 20:12
Python Random Lines
import os.path
import random
class RandomLines(object):
def __init__(self, input_file, cache_index=True):
if isinstance(input_file, basestring):
self.source_file = open(input_file,'rb')
filename = input_file
else:
self.source_file = input_file
@matthewrobertbell
matthewrobertbell / Simple DHT
Created September 28, 2011 02:24
An attempt a simple minimalist DHT
import hashlib
class DHT(object):
def __init__(self):
self.hashes = set()
def nodes(self,hash):
hash_int = long(hash,16)
closeness = {}
for v in self.hashes: