Skip to content

Instantly share code, notes, and snippets.

# Copyright 2008 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
{ 'AirlineClassifier': { '20363': [ (0L, 0.8955554950240574),
(1L, 0.10443541423213792)]},
'CarrierClassifier': { '9E': [ (0L, 0.8955554950240574),
(1L, 0.10443541423213792)]},
'CarrierDelayClassifier': { '0-15': [(1L, 0.0082973853580048263)],
'15-30': [(1L, 0.0082973853580048263)],
'30-60': [(1L, 0.0082973853580048263)],
'<=0': [ (0L, 0.00099066773934030575),
(1L, 0.0082973853580048263)],
'> 1hr': [(1L, 0.0082973853580048263)]},
[config]
my_packages = python_spike
[buildout]
parts = sqlalchemy_repl test
develop = .
eggs = ${config:my_packages}
[sqlalchemy_repl]
recipe = zc.recipe.egg
# 0L means no delay
# 1L means delay
results_for_5M = { 'AirlineClassifier': [ { '19930': [ (0L, 0.75960811707313236),
(1L, 0.24038830082876111)]},
{ '20355': [ (0L, 0.83392410129691941),
(1L, 0.16607501321603604)]},
{ '19790': [ (0L, 0.83218253725971247),
(1L, 0.16781674431645197)]},
{ '19977': [ (0L, 0.80293842859590392),
public static class LineIndexerMapper extends MapReduceBase implements Mapper {
private final static Text word = new Text();
private final static Text summary = new Text();
public void map(WritableComparable key, Writable val,
OutputCollector output, Reporter reporter)
throws IOException {
String line = val.toString();
summary.set(key.toString() + ":" + line);
StringTokenizer itr = new StringTokenizer(line.toLowerCase());
while(itr.hasMoreTokens()) {
(ns org.parsimony-group
(:import [cascading.cascade Cascade CascadeConnector Cascades]
[cascading.flow Flow FlowConnector]
[cascading.operation Identity]
[cascading.operation.aggregator Count]
[cascading.operation.xml TagSoupParser XPathGenerator XPathOperation]
[cascading.operation.regex RegexFilter RegexGenerator RegexReplace RegexSplitter]
[cascading.pipe Each Pipe SubAssembly Every GroupBy]
[cascading.scheme TextLine SequenceFile]
[cascading.tap Hfs Lfs Tap]
(ns org.parsimony-group.runner
(:import [cascading.cascade Cascade CascadeConnector Cascades]
[cascading.flow Flow FlowConnector]
[cascading.operation Identity]
[cascading.operation.aggregator Count]
[cascading.operation.xml TagSoupParser XPathGenerator XPathOperation]
[cascading.operation.regex RegexFilter RegexGenerator RegexReplace RegexSplitter]
[cascading.pipe Each Pipe SubAssembly Every GroupBy]
[cascading.scheme TextLine SequenceFile]
[cascading.tap Hfs Lfs Tap]
(defn rolling-window [coll n]
(when (>= (count coll) n)
(lazy-seq
(cons (take n coll)
(rolling-window (rest coll) n)))))
(defn rolling-window [coll n]
(when (>= (count coll) n)
(lazy-seq
(cons (take n coll)
(rolling-window (rest coll) n)))))
(map #(mean %) (rolling-window source-stream 5))
(defn identityfunc [x] x)
(defn isPositiveNumber [num]
(>= num 0))
(defn prevFlightRange [data]
(:prevFlightRange (:totals data)))
(defn custom []
{
:filter {:using isPositiveNumber :groupby prevFlightRange}
:each {:using identityfunc}