Skip to content

Instantly share code, notes, and snippets.

View micaleel's full-sized avatar
:octocat:
I may be slow to respond.

Khalil micaleel

:octocat:
I may be slow to respond.
View GitHub Profile
@j4mie
j4mie / normalise.py
Created August 30, 2010 12:44
Normalise (normalize) unicode data in Python to remove umlauts, accents etc.
# -*- coding: utf-8 -*-
import unicodedata
""" Normalise (normalize) unicode data in Python to remove umlauts, accents etc. """
data = u'naïve café'
normal = unicodedata.normalize('NFKD', data).encode('ASCII', 'ignore')
print normal
@didip
didip / request_handler_test.py
Created March 12, 2011 21:46
Testing Tornado RequestHandlers
import unittest, os, os.path, sys, urllib
import tornado.database
import tornado.options
from tornado.options import options
from tornado.testing import AsyncHTTPTestCase
# add application root to sys.path
APP_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(os.path.join(APP_ROOT, '..'))
@zanshin
zanshin / .zshrc
Created August 12, 2011 19:09
My .zshrc file
# Path to your oh-my-zsh configuration.
export ZSH=$HOME/.oh-my-zsh
# Set name of the theme to load.
# Look in ~/.oh-my-zsh/themes/
# Optionally, if you set this to "random", it'll load a random theme each
# time that oh-my-zsh is loaded.
#export ZSH_THEME="robbyrussell"
export ZSH_THEME="zanshin"
@xim
xim / cluster_example.py
Created October 11, 2011 20:19
Clustering K-Means by euclidian distance, yay!
import sys
import numpy
from nltk.cluster import KMeansClusterer, GAAClusterer, euclidean_distance
import nltk.corpus
from nltk import decorators
import nltk.stem
stemmer_func = nltk.stem.EnglishStemmer().stem
stopwords = set(nltk.corpus.stopwords.words('english'))
@marcelcaraciolo
marcelcaraciolo / mapfeature.py
Created November 15, 2011 00:36
Map Feature
def map_feature(x1, x2):
'''
Maps the two input features to quadratic features.
Returns a new feature array with more features, comprising of
X1, X2, X1 ** 2, X2 ** 2, X1*X2, X1*X2 ** 2, etc...
Inputs X1, X2 must be the same size
'''
x1.shape = (x1.size, 1)
@marcelcaraciolo
marcelcaraciolo / logistic_reg.py
Created November 15, 2011 01:39
logistic_reg.py
from numpy import loadtxt, where, zeros, e, array, log, ones, append, linspace
from pylab import scatter, show, legend, xlabel, ylabel, contour, title
from scipy.optimize import fmin_bfgs
def sigmoid(X):
'''Compute the sigmoid function '''
#d = zeros(shape=(X.shape))
den = 1.0 + e ** (-1.0 * X)
@thoas
thoas / puller.py
Created January 10, 2012 15:04
Migration script from MySQL to PostgreSQL via SQLAlchemy
#!/usr/bin/env python
import getopt
import sys
from sqlalchemy import create_engine, MetaData, Table
from sqlalchemy.dialects.mysql.base import TINYINT
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.exc import ProgrammingError
auth = OAuthHandler(CLIENT_ID, CLIENT_SECRET, CALLBACK)
auth.set_access_token(ACCESS_TOKEN)
api = API(auth)
venue = api.venues(id='4bd47eeb5631c9b69672a230')
stopwords = nltk.corpus.stopwords.words('portuguese')
tokenizer = RegexpTokenizer("[\w’]+", flags=re.UNICODE)
@marcelcaraciolo
marcelcaraciolo / tf_idf_final.py
Created January 13, 2012 03:38
tf-idf example
#-*- coding: utf-8 -*-
import re
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk import bigrams, trigrams
import math
stopwords = nltk.corpus.stopwords.words('portuguese')
@anhpt379
anhpt379 / tf_idf_final.py
Created January 13, 2012 07:51 — forked from marcelcaraciolo/tf_idf_final.py
tf-idf example
#-*- coding: utf-8 -*-
import re
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk import bigrams, trigrams
import math
stopwords = nltk.corpus.stopwords.words('portuguese')