Skip to content

Instantly share code, notes, and snippets.

@jonathana
jonathana / grouplens_evaluator.py
Created June 22, 2011 20:15
"Mahout in Action" Grouplens evaluator sample from section 2.5 ported to jython
import sys, os, glob
from datetime import datetime
sys.path.append(os.environ.get("MAHOUT_CORE"))
for jar in glob.glob(os.environ.get("MAHOUT_JAR_DIR") + "/*.jar"):
sys.path.append(jar)
from org.apache.mahout.common import RandomUtils
from org.apache.mahout.cf.taste.common import TasteException
from org.apache.mahout.cf.taste.eval import *
@bnyeggen
bnyeggen / raid_mtbf.py
Created July 11, 2011 22:54
A RAID MTBF calculator
#redundancy is the max number of survivable failures, so eg 1 for RAID5
#mtbf_array is an array of either actual mean-time-between-failures, or a nested RAID array
# RAID([100]*7,2) #7 disk RAID 6
# RAID([RAID([100]*3,1),RAID([1000]*3,1)],0) # RAID 50, 2 arrays of 3
# RAID([100,100,50,50],1) #RAID 5 with varying reliabilities
from random import random
class RAID(object):
@bnyeggen
bnyeggen / multiprocess_with_instance_methods.py
Created July 16, 2011 14:17
Example showing how to use instance methods with the multiprocessing module
from multiprocessing import Pool
from functools import partial
def _pickle_method(method):
func_name = method.im_func.__name__
obj = method.im_self
cls = method.im_class
if func_name.startswith('__') and not func_name.endswith('__'): #deal with mangled names
cls_name = cls.__name__.lstrip('_')
func_name = '_' + cls_name + func_name
@kachok
kachok / listing.py
Created July 29, 2011 17:11
Quick and dirty way to get 1.usa.gov click archive
import urllib
import re
import time
data = urllib.urlopen('http://bitly.measuredvoice.com/bitly_archive/?C=M;O=D').read()
#print data
#datafiles name pattern - usagov_bitly_data2011-07-29-1311919454
@xim
xim / cluster_example.py
Created October 11, 2011 20:19
Clustering K-Means by euclidian distance, yay!
import sys
import numpy
from nltk.cluster import KMeansClusterer, GAAClusterer, euclidean_distance
import nltk.corpus
from nltk import decorators
import nltk.stem
stemmer_func = nltk.stem.EnglishStemmer().stem
stopwords = set(nltk.corpus.stopwords.words('english'))
@paulmwatson
paulmwatson / ds_to_gft.rb
Created November 23, 2011 17:49
Records DataSift Twitter interaction stream to Google Fusion Table
# Description: Consume a DataSift stream and save Twitter interactions to a Google Fusion Table
# Author: Paul M. Watson <paul.watson@storyful.com>
# Date: 2011/11/28
# Usage:
# ruby ds_to_gft.rb <DataSift stream id hash>
# config.yml should contain;
# datasift:
# username: datasift username
# api_key: datasift api key
# googlefusiontables:
@steipete
steipete / iOSDocumentMigrator.m
Created December 6, 2011 15:21
Helps migrating documents between iOS <= 5.0 and >= 5.0.1 to comply with Apple's iCloud guidelines. Follow @steipete on Twitter for updates.
#include <sys/xattr.h>
/// Set a flag that the files shouldn't be backuped to iCloud.
+ (void)addSkipBackupAttributeToFile:(NSString *)filePath {
u_int8_t b = 1;
setxattr([filePath fileSystemRepresentation], "com.apple.MobileBackup", &b, 1, 0, 0);
}
/// Returns the legacy storage path, used when the com.apple.MobileBackup file attribute is not available.
+ (NSString *)legacyStoragePath {
//
// MNDocumentConflictResolutionViewController.h
// MindNodeTouch
//
// Created by Markus Müller on 15.12.11.
// Copyright (c) 2011 __MyCompanyName__. All rights reserved.
//
#import <UIKit/UIKit.h>
@class MNDocumentReference;
@michaelaguiar
michaelaguiar / d3donut.js
Created December 17, 2011 00:37
d3.js donut chart test
var donutVal = 85;
var donutFull = 100 - donutVal;
var d3_category_socialmedia = ["#0054a6", "#dbdbdb"];
if(donutVal < 50) {
donutVal = -donutVal;
donutFull = -donutFull;
@chrishamant
chrishamant / s3_multipart_upload.py
Created January 3, 2012 19:29
Example of Parallelized Multipart upload using boto
#!/usr/bin/env python
"""Split large file into multiple pieces for upload to S3.
S3 only supports 5Gb files for uploading directly, so for larger CloudBioLinux
box images we need to use boto's multipart file support.
This parallelizes the task over available cores using multiprocessing.
Usage:
s3_multipart_upload.py <file_to_transfer> <bucket_name> [<s3_key_name>]