Skip to content

Instantly share code, notes, and snippets.

View bogsio's full-sized avatar

George-Bogdan Ivanov bogsio

View GitHub Profile
python manage.py syncdb
pip install Django South
django-admin.py startproject tptutorial
mkdir tastypie-tutorial
cd tastypie-tutorial/
virtualenv tptut
pip install Django South
source tptut/bin/activate
from django.core.servers.basehttp import FileWrapper
from django.http import HttpResponse
import tempfile
import urllib
# ...
def xml_view(request):
# ...
filename, _str = generate_xml()
def generate_xml():
return 'items.xml', '<items><item>Item 1</item></items>'
print convert("death", WN_NOUN, WN_VERB)
print convert("story", WN_NOUN, WN_VERB)
print convert("boring", WN_ADJECTIVE, WN_NOUN)
print convert("trouble", WN_NOUN, WN_ADJECTIVE)
print convert("solve", WN_VERB, WN_ADJECTIVE_SATELLITE)
print convert("think", WN_VERB, WN_ADJECTIVE)
[('die', 0.75), ('end', 0.2), ('decease', 0.05)]
[('report', 0.2222222222222222), ('tell', 0.2222222222222222), ('narrate', 0.2222222222222222),...
[('tedium', 0.3333333333333333), ('dullness', 0.16666666666666666),...
from nltk.corpus import wordnet as wn
# Just to make it a bit more readable
WN_NOUN = 'n'
WN_VERB = 'v'
WN_ADJECTIVE = 'a'
WN_ADJECTIVE_SATELLITE = 's'
WN_ADVERB = 'r'
def convert(word, from_pos, to_pos):
print index.lookup('loves')
['Tunnel of Love', 'Whole Lotta Love', "Since I've Been Loving You"]
print index.lookup('loved')
['Tunnel of Love', 'Whole Lotta Love', "Since I've Been Loving You"]
print index.lookup('daze')
['Dazed and Confused']
print index.lookup('confusion')
['Dazed and Confused']
# TOP10 Dire straits
index.add('Industrial Disease')
index.add('Private Investigations')
index.add('So Far Away')
index.add('Twisting by the Pool')
index.add('Skateaway')
index.add('Walk of Life')
index.add('Romeo and Juliet')
index.add('Tunnel of Love')
index.add('Money for Nothing')
import nltk
from collections import defaultdict
from nltk.stem.snowball import EnglishStemmer
class Index:
""" Inverted index datastructure """
def __init__(self, tokenizer, stemmer=None, stopwords=None):
"""
tokenizer -- NLTK compatible tokenizer function