Gavin Hackeling gavinmh

## gist:1775709
asdasdasdasdas

## viterbi.py
# -*- coding: utf-8 -*-
"""
This is an example of a basic optical character recognition system.
Some components, such as the featurizer, are missing, and have been replaced
with data that I made up.

This system recognizes words produced from an alphabet of 2 letters: 'l' and 'o'.
Words that can be recognized include, 'lol', 'lolol', 'and loooooll'.
We'll assume that this system is used to digitize hand-written notes by Redditors,
or something.

## featurizer_sub.py
from __future__ import division
from nltk.corpus import wordnet as wn
from nltk.corpus import wordnet_ic
from nltk.metrics import edit_distance
from nltk.corpus.reader.wordnet import WordNetError
import numpy as np
import logging, os
import Alignment_sub


## Alignment_sub.py
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 23 11:25:40 2012

@author: gavin
"""
import logging
from nltk.corpus import wordnet as wn

class Alignment_sub:

## classifier_substition.py
try:
    import cPickle as pickle
except:
    import pickle

from sklearn.ensemble import RandomForestClassifier
import logging, os


class Lexent_classifier_sub:

## harness_substitution.py
import logging
import numpy as np
import Alignment_sub
import lexent_featurizer_sub

try:
    import cPickle as pickle
except:
    import pickle


## 52-displaylink.conf
Section "Device"
Identifier      "intel"
driver          "intel"
EndSection

Section "Device"
Identifier      "dl1"
driver          "displaylink"
Option  "fbdev" "/dev/fb1"
EndSection

## naive_summarizer
# -*- coding: utf-8 *-*

'''
The following is a naive, unsupervised text summarizer.
It extracts N of the text's most salient sentences.
Salience is defined as the average of the tf-idf weights of the words in a sentence.
'''
from nltk import sent_tokenize, word_tokenize
from collections import Counter
from math import log10

## ner.py
# -*- coding: utf-8 -*-
'''

'''
from nltk import sent_tokenize, word_tokenize, pos_tag, ne_chunk


def extract_entities(text):
	entities = []
	for sentence in sent_tokenize(text):

## made-in-nyc-jobs.tsv

          
            10Gen (The MongoDB Company)
            http://www.10gen.com/careers

            
              1stdibs.com
              http://www.1stdibs.com/jobs/

            
              20x200
              http://www.20x200.com/jobs/

            
              29th Street Publishing
              http://29.io

            
              2tor Inc.
              http://2tor.com/careers/

            
              303 Network, Inc.

            
              33Across
              http://33across.com/careers.php#axzz1uqxl0v16

            
              360i
              http://360i.com/careers

            
              3degrees
              http://toprubyjobs.com/jobs/399-3degrees-cto-%252F-lead-rails-engineer

            
              680 Partners LLC 
              http://www.680partners.com
	# -- coding: utf-8 --
	"""
	This is an example of a basic optical character recognition system.
	Some components, such as the featurizer, are missing, and have been replaced
	with data that I made up.

	This system recognizes words produced from an alphabet of 2 letters: 'l' and 'o'.
	Words that can be recognized include, 'lol', 'lolol', 'and loooooll'.
	We'll assume that this system is used to digitize hand-written notes by Redditors,
	or something.
	from __future__ import division
	from nltk.corpus import wordnet as wn
	from nltk.corpus import wordnet_ic
	from nltk.metrics import edit_distance
	from nltk.corpus.reader.wordnet import WordNetError
	import numpy as np
	import logging, os
	import Alignment_sub
	# -- coding: utf-8 --
	"""
	Created on Fri Nov 23 11:25:40 2012

	@author: gavin
	"""
	import logging
	from nltk.corpus import wordnet as wn

	class Alignment_sub:
	try:
	import cPickle as pickle
	except:
	import pickle

	from sklearn.ensemble import RandomForestClassifier
	import logging, os


	class Lexent_classifier_sub:
	Section "Device"
	Identifier "intel"
	driver "intel"
	EndSection

	Section "Device"
	Identifier "dl1"
	driver "displaylink"
	Option "fbdev" "/dev/fb1"
	EndSection
	# -- coding: utf-8 -*

	'''
	The following is a naive, unsupervised text summarizer.
	It extracts N of the text's most salient sentences.
	Salience is defined as the average of the tf-idf weights of the words in a sentence.
	'''
	from nltk import sent_tokenize, word_tokenize
	from collections import Counter
	from math import log10
	# -- coding: utf-8 --
	'''

	'''
	from nltk import sent_tokenize, word_tokenize, pos_tag, ne_chunk


	def extract_entities(text):
	entities = []
	for sentence in sent_tokenize(text):
	10Gen (The MongoDB Company)	http://www.10gen.com/careers
	1stdibs.com	http://www.1stdibs.com/jobs/
	20x200	http://www.20x200.com/jobs/
	29th Street Publishing	http://29.io
	2tor Inc.	http://2tor.com/careers/
	303 Network, Inc.
	33Across	http://33across.com/careers.php#axzz1uqxl0v16
	360i	http://360i.com/careers
	3degrees	http://toprubyjobs.com/jobs/399-3degrees-cto-%252F-lead-rails-engineer
	680 Partners LLC	http://www.680partners.com