Last active
August 29, 2015 13:57
-
-
Save dollschasingmen/9416004 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns tests.gamesavetaptest | |
(:use | |
[cascalog.api]) | |
(:require | |
[cascalog.logic.ops :as c] | |
[cascalog.more-taps :as mt])) | |
;; fields and classes to coerce to | |
(def vars ["!event_id" "!user_id" "!app_name" "!client_platform" "!account_state" "!game_id" "!game_result_id" "!score" "!is_training" "!metadata" "!bpis" "!created_at" "!time_id" "!date_id" "!milliseconds" "!yyyy" "!dd" "!mm" "!event_created_at" "!event_time_id" "!event_date_id" "!event_milliseconds" "!event_yyyy" "!event_dd" "!event_mm" "!user_level" "!session_level" "!user_agent"]) | |
(def classes [java.lang.Long java.lang.Long java.lang.String java.lang.String java.lang.String java.lang.Integer java.lang.Long java.lang.Integer java.lang.Boolean java.lang.String java.lang.String java.lang.String java.lang.Integer java.lang.Integer java.lang.Long java.lang.Integer java.lang.Integer java.lang.Integer java.lang.String java.lang.Integer java.lang.Integer java.lang.Long java.lang.Integer java.lang.Integer java.lang.Integer java.lang.Integer java.lang.Integer java.lang.String]) | |
;; query | |
(defn gamesave-count-test | |
[out root] | |
(let [ tap (mt/hfs-delimited root | |
:strict? false | |
:source-pattern "/section=*/*/*" | |
:outfields vars | |
:classes classes) | |
query (<- [!cnt] (tap :>> vars)(c/!count !user_id :> !cnt))] | |
(?- out query))) | |
;; local test | |
(def local-dev-path "/Users/axue/lumos/data/lumoscalog_test_data/views/gamesave_unpartitioned") | |
(defn local-test-gamesave-count-test [] (gamesave-count-test (stdout) local-dev-path)) | |
;; main | |
(defmain RunTest [in-path out-path] (gamesave-count-test (hfs-textline out-path :sinkmode :replace) in-path)) |
cli for this:
elastic-mapreduce --create --name prod_test_lc2_baretest
--log-uri s3://lumos-data-dump-dev01/prod_tests/log/2014_03_06/lumoscalog2
--ami-version latest --hadoop-version 1.0.3
--num-instances 30
--instance-type c1.xlarge
--master-instance-type c1.xlarge
--jar s3://lumos-data-dump-prod01/scripts/lumoscalog-2.0.0-standalone.jar
--main-class tests.gamesavetaptest.RunTest
--arg s3://lumos-data-dump-prod01/views/gamesave_sectioned_2
--arg s3://lumos-data-dump-dev01/andy/out/prod_test_lc2_baretest
w/ 30 c1.xlarge we should be able to get through gamesave data in like ~30 min, instead it fails on mapper timeouts
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
bare ass test w/o no dependencies outside of cascalog 2.0/cascading 2.2
this shit runs slow as fuck, mappers time out.