Skip to content

Instantly share code, notes, and snippets.

View honnibal's full-sized avatar

Matthew Honnibal honnibal

View GitHub Profile
import sys
def shift(words, stack, c):
return words, stack + c
def reduce(words, stack, c):
return words + (stack,), c
@honnibal
honnibal / gist:30499850449a46c167a8
Created July 16, 2015 17:01
Syntax-specific search with spaCy
"""
Example use of the spaCy NLP tools for data exploration.
Here we will look for reddit comments that describe Google doing something,
i.e. discuss the company's actions. This is difficult, because other senses of
"Google" now dominate usage of the word in conversation, particularly references to
using Google products.
The heuristics here are quick and dirty --- about 5 minutes work. A better approach
is to use the word vector of the verb. But, the demo here is just to show what's
@honnibal
honnibal / simple_bigrams.py
Created September 14, 2015 06:35
Simple but not so accurate bigram language model
from preshed.counter import PreshCounter
from spacy.en import English
from spacy.attrs import ORTH, IS_OOV
import plac
import plac
from os import path
import os
@honnibal
honnibal / sort_like_color.py
Last active September 27, 2015 09:48
Find words that might be colors, using word vectors.
from __future__ import unicode_literals
from __future__ import print_function
import plac
import spacy.en
def main(vectors_loc=None):
nlp = spacy.en.English()
@honnibal
honnibal / mc.pyx
Last active January 28, 2016 22:54
Monte carlo simulation, re /r/python thread on numba, cython, etc
# cython: infer_types=True
# cython: boundscheck=False
# cython: cdvision=True
# distutils: compile_options = ["-O2", "-fopenmp", "-march=native"]
# distutils: link_options = ["-fopenmp"]
cimport cython
from numpy import random as rng
import numpy as np
import numpy.random
# Simple sentiment analysis with lots and lots of problems. For answer to Quora thread:
# https://www.quora.com/Would-it-be-possible-for-an-undergraduate-like-me-to-create-a-sentiment-analysis-program
import sys
from collections import counter
with open(sys.argv[1]) as file_:
positive_text = file_.read()
with open(sys.argv[2]) as file_:
negative_text = file_.read()
http://s000.tinyupload.com/index.php?file_id=07575878755298799648
#!/usr/bin/env bash
HERE=`pwd`
cd /tmp
wget http://www.python.org/ftp/python/2.7.5/Python-2.7.5.tgz /tmp
tar -zxvf Python-2.7.5.tgz
cd Python-2.7.5
mkdir $HERE/.python
./configure --prefix=$HERE/.python
@honnibal
honnibal / gist:1231964e784ab9acb65d
Last active November 18, 2016 06:53
Example of parsing Reddit comment dumps with spaCy: http://spacy.io/
from __future__ import unicode_literals
from __future__ import print_function
import sys
import plac
import bz2
import ujson
import spacy.en
def main(input_loc):
@honnibal
honnibal / dynamic_params.py
Last active June 29, 2017 09:30
Cycle hyper parameter
def cycle_hyper_param(low, high):
'''Dynamically oscillate a hyper-parameter between two values.
Uses the loss momentum to adjust the rate of change. The idea is
that the value should move through regions where the loss is flat
faster, and linger in values where the loss improves.
'''
inc = 0.0001
trend = 0.
prev = 0.