Skip to content

Instantly share code, notes, and snippets.

View mikejs's full-sized avatar

Michael Stephens mikejs

View GitHub Profile
import os
try:
from pypy.rlib.jit import JitDriver, purefunction, hint
except ImportError:
# If pypy isn't importable then (presumably) we're not being translated,
# so make JIT stuff no-ops.
class JitDriver(object):
def __init__(self, **kwargs):
pass
@mikejs
mikejs / gist:827680
Created February 15, 2011 15:50
merge a bunch of shapefiles
#!/bin/bash
mkdir merged
for f in "$@"
do
if [ -f merged/merged.shp ]
then
ogr2ogr -f "esri shapefile" -update -append merged/merged.shp $f -nln Merged
else
ogr2ogr -f "esri shapefile" merged/merged.shp $f
fi
def pagerank(matrix, d_factor=0.85):
"""
Calculate the pagerank vector of a given adjacency matrix (using
the power method).
:param matrix: an adjacency matrix
:param d_factor: the damping factor
"""
size = len(matrix)
epsilon = 0.0001
from mongrel2.config import *
main = Server(
uuid="cb69cc4f-59d0-4cdb-aefe-4dcbe83f4682",
access_log="/logs/access.log",
error_log="/logs/error.log",
chroot="./",
default_host="mikej.st",
name="main",
pid_file="/run/mongrel2.pid",
# ddg -h
Usage: ddg [options] query
Options:
--version show program's version number and exit
-h, --help show this help message and exit
-o, --open open results in a browser
-n N number of results to show
-d D disambiguation choice
# Check that nltk and strfry produce same levenshtein distance on
# a bunch of randomly generated strings
import strfry
import nltk.metrics
import random
for i in xrange(0, 10000):
a = ''.join([chr(random.randint(1, 255)) for x in xrange(0, random.randint(0, 20))])
b = ''.join([chr(random.randint(1, 255)) for x in xrange(0, random.randint(0, 20))])
# Levenshtein Distance:
# NLTK: 446.74 usec/call
# strfry: 1.86 usec/call
import timeit
nltk_lev = timeit.Timer(stmt="nltk.metrics.edit_distance('this is a string', 'this is another string')",
setup="import nltk.metrics")
str_lev = timeit.Timer(stmt="strfry.levenshtein_distance('this is a string', 'this is another string')",
import unicodedata
from pymongo.son import SON
from pymongo.son_manipulator import SONManipulator
class NormalizeUnicode(SONManipulator):
def __init__(self, form="NFKD"):
self.__form = form
train <- read.csv("~/code/informs/TrainingData.csv", row.names=1, header=TRUE)
test <- read.csv("~/code/informs/ResultData.csv", row.names=1, header=TRUE)
for (col in c("Variable142OPEN", "Variable142LOW", "Variable142HIGH",
"Variable142LAST", "Variable158OPEN", "Variable158LOW",
"Variable158HIGH", "Variable158LAST")) {
train[col] <- NULL
test[col] <- NULL
}
train[is.na(train)] <- 0.0
# Replace all of the lxml cssselect calls in a python script with
# equivalent xpath calls.
import re
import sys
import lxml.cssselect
def cssselect_replace(match):
xpath = lxml.cssselect.css_to_xpath(match.group(2))
xpath = xpath.replace("'", "\\'").replace('"', '\\"')