Skip to content

Instantly share code, notes, and snippets.

@tcabrol
tcabrol / open_data.py
Created February 6, 2012 23:05
French Open Data :: getting Tax data
#!/usr/bin/env python
# encoding: utf-8
"""
open_data.py
Created by Thomas Cabrol on 2012-01-27.
"""
import re
import os
@tcabrol
tcabrol / tax_data.py
Created February 13, 2012 21:23
Crawling data.gouv.fr for Tax Data
#!/usr/bin/env python
# encoding: utf-8
"""
tax_data.py
Created by Thomas Cabrol on 2012-02-12.
"""
import re
@tcabrol
tcabrol / get_tax_data.py
Created February 15, 2012 17:11
data.gouv.fr :: crawl and download files
#!/usr/bin/env python
# encoding: utf-8
"""
get_tax_data.py
Created by Thomas Cabrol on 2012-02-15.
"""
import os
@tcabrol
tcabrol / extract_tax_data.py
Created February 15, 2012 17:21
data.gouv.fr :: extract data from Excel files
#!/usr/bin/env python
# encoding: utf-8
"""
extract_tax_data.py
Created by Thomas Cabrol on 2012-02-15.
"""
import codecs
import os
@tcabrol
tcabrol / yahoo_geocoder.py
Created March 5, 2012 22:24
Yahoo Geocoder
#!/usr/bin/env python
# encoding: utf-8
"""
geocoder.py
Created by Thomas Cabrol on 2012-02-08.
"""
import codecs
import simplejson
@tcabrol
tcabrol / r_ggplot2.R
Created March 21, 2012 18:24
R and ggplot2 on the iris dataset
require("ggplot2")
# Loading Iris dataset
columns <- c("sepal_length", "sepal_width", "petal_length", "petal_width", "class")
iris <- read.table("/Users/thomas/Documents/data/datasets/iris/iris_learn.csv", sep=',', col.names=columns)
# Simple bar graphs wrapped inside a function
# Showing the mean value of a given variable
graph.mean <- function (variable) {
@tcabrol
tcabrol / movie_recommender.sql
Created April 6, 2012 11:41
Movie Recommender :: SQL
USE movie_lens ;
-- Create the table structure ;
DROP TABLE IF EXISTS movies_ratings ;
CREATE TABLE movies_ratings (
user_id INT ,
movie_id INT ,
rating INT ,
timestamp INT ,
@tcabrol
tcabrol / movie_recommender.sas
Created April 6, 2012 12:00
Movie Recommender :: SAS
** Loading ratings dataset ;
filename source "Z:\data\datasets\movielens\ml-100k\u.data" ;
data Movies_Ratings ;
attrib
user_id informat=best8.
movie_id informat=best8.
rating informat=best8. ;
infile
source dlm='09'x dsd missover ;
@tcabrol
tcabrol / movie_recommender.pig
Created April 6, 2012 12:07
Movie Recommender :: Pig
-- Loading base data
movies_ratings = LOAD '/Users/thomas/Documents/data/datasets/movielens/ml-100k/u.data' USING PigStorage('\t') AS (user_id:int, movie_id:int, rating:int) ;
-- Starting by limiting the dataset to movies with at least 30 ratings ;
B = GROUP movies_ratings BY movie_id ;
C = FOREACH B GENERATE group AS movie_id, COUNT($1) AS count ;
D = FILTER C BY count >= 30 ;
E = FOREACH D GENERATE movie_id AS movie_ok ;
F = JOIN movies_ratings BY movie_id, E BY movie_ok ;
filtered = FOREACH F GENERATE user_id, movie_id, rating ;
@tcabrol
tcabrol / movie_recommender.py
Created April 8, 2012 00:15
Movie Recommender :: Python
#!/usr/bin/env python
# encoding: utf-8
"""
movie_recommender.py
Created by Thomas Cabrol on 2012-04-06.
Copyright (c) 2012 __MyCompanyName__. All rights reserved.
"""
import csv