Skip to content

Instantly share code, notes, and snippets.

@andreiolariu
andreiolariu / phrase_extraction.py
Created November 1, 2011 19:36
Extracting the most used phrases in a text document
# for more info check out http://webmining.olariu.org/is-winter-really-coming
import re
from math import log, sqrt
import matplotlib.pyplot as pyplot
DEPTH = 3 # minimum depth for tree construction = minimum phrase length
OCCURRENCES = 10 # minimum number of phrase occurrences
text = open('game.txt').read() # reading input data
text = text.lower()
@andreiolariu
andreiolariu / mm_youtube.py
Created November 18, 2011 17:25
Markov model based on youtube comments
# for more info check out http://webmining.olariu.org/interview-with-a-lady-gaga-fan
# made to be run in the ipython console
import urllib, urllib2, time, random
import simplejson as json
def fetch_url(url, get=None, post=None):
user_agent = 'Andrei Olariu\'s Web Mining for Dummies'
headers = {'User-Agent': user_agent}
if get:
@andreiolariu
andreiolariu / hackaton.py
Created November 27, 2011 15:14
uberVU Hackaton - Noun-verb relationships
# more info: http://webmining.olariu.org/ubervu-hackaton-relationship-tagcloud
from nltk import pos_tag, word_tokenize
import en # Nodebox English Linguistics library
import urllib, urllib2, re
import json
from time import time
def fetch_url(url, get=None, post=None):
user_agent = 'Andrei Olariu\'s Web Mining for Dummies'
@andreiolariu
andreiolariu / elclasico.py
Created December 11, 2011 20:15
Histogram and keyword detection for tweets during Real Madrid - Barcelona match
# more info at http://webmining.olariu.org/el-clasico-on-twitter
# this code is designed to be run in ipython
import urllib, urllib2, time, threading, Queue, re
from datetime import datetime
import simplejson as json
import matplotlib.pyplot as plt
import numpy as np
@andreiolariu
andreiolariu / oscars.py
Created February 12, 2012 08:59
uberVU hackaton - Twitter Tagcloud for Oscar Best Movie Nominees
# more info here: http://webmining.olariu.org/the-story-of-the-oscar-predictions
import urllib, urllib2, re
import json
from time import time
# using this POS tagger:
# http://jasonwiener.com/2006/01/20/simple-nlp-part-of-speech-tagger-in-python/
import NLPlib
@andreiolariu
andreiolariu / minify.py
Created November 26, 2013 23:03 — forked from palcu/minify.py
import math;f=open('i');r=f.readline
for _ in range(1,int(r())+1):
m=[];s=b=0
for i in range(int(r())):m.append([1 if j=='#' else 0 for j in r()]);s+=sum(m[i])
a=int(math.sqrt(s))
while not filter(None,m[0]):m.pop(0)
x=m[0].index(1)
b=sum([sum(v[x:x+a]) for v in m[:a]])
print "Case #%s: %s"%(_,'YES' if b==s else 'NO')