Skip to content

Instantly share code, notes, and snippets.

@thinrhino
thinrhino / zipfs.py
Created May 5, 2014 13:07
Plot zipf's law
from collections import defaultdict
import matplotlib.pyplot as plt
data = open('<data_file>', 'r')
r_data = []
# reading relevant data
while True:
l = data.readline()
if l == '':
# Script to populate data into MongoDB
import twitter
import time
import logging
from pymongo import MongoClient
CONSUMER_KEY = '<twitter_consumer_key>'
CONSUMER_SECRET = '<twitter_secret_key>'
@thinrhino
thinrhino / raw_data_mixpanel.py
Created May 2, 2014 10:18
A piece of code to retrieve raw data from mixpanel and dump into a bucket on AWS S3
"""
Code to download and upload raw data from mix-panel
"""
import hashlib
import datetime
import time
import tempfile
import os
import bz2
@thinrhino
thinrhino / transactions_subset.py
Created April 20, 2014 19:19
Kaggle: Acquire Valued Shoppers Challenge: reducing the dataset from 22GB to 1GB
import pandas
df = pandas.read_csv('offers.csv.gz', compression='gzip')
categories = df.category.tolist()
subset = open('subset.csv', 'w')
fl = open('transactions.csv', 'r')
fl.readline()
while True:
l = fl.readline()
if l == '':