Tarrasch/stream_count.py

## stream_count.py
import datetime
import luigi
from spotify.luigi.crunch import ScrubJobTask, load_avsc
from spotify.luigi import HdfsTarget
from spotify.luigi.external_shrek_anonym import CreateEndSongCleaned


class SampleEndSongSubset(luigi.ExternalTask):
    def output(self):
        return HdfsTarget("/user/spotify-analytics-data/examples/data_pipeline_crunch/stream_count_anonym")


class Example1StreamCountJob(ScrubJobTask):
    """
    You can run this example from maven artifact:
    > greaserun --runner luigi com.spotify.data:spotify-data-crunch:LATEST --module stream_count --task Example1StreamCountJob
    or using your local build (uploaded to your edgenode):
    > greaserun --runner luigi myartifaaaaaact-0.1.2.3.4.5-jar-with-dependencies.jar --module stream_count --task Example1StreamCountJob
    """
    def main_class(self):
        return "mygrooooooooooooooooup.pipeline.Example1StreamCountJob"

    def requires(self):
        return {
            "input": SampleEndSongSubset()
        }

    def output(self):
        return HdfsTarget('stream_count', schema=load_avsc("ExamplePlaysByCountry.avsc"))
	import datetime
	import luigi
	from spotify.luigi.crunch import ScrubJobTask, load_avsc
	from spotify.luigi import HdfsTarget
	from spotify.luigi.external_shrek_anonym import CreateEndSongCleaned


	class SampleEndSongSubset(luigi.ExternalTask):
	def output(self):
	return HdfsTarget("/user/spotify-analytics-data/examples/data_pipeline_crunch/stream_count_anonym")


	class Example1StreamCountJob(ScrubJobTask):
	"""
	You can run this example from maven artifact:
	> greaserun --runner luigi com.spotify.data:spotify-data-crunch:LATEST --module stream_count --task Example1StreamCountJob
	or using your local build (uploaded to your edgenode):
	> greaserun --runner luigi myartifaaaaaact-0.1.2.3.4.5-jar-with-dependencies.jar --module stream_count --task Example1StreamCountJob
	"""
	def main_class(self):
	return "mygrooooooooooooooooup.pipeline.Example1StreamCountJob"

	def requires(self):
	return {
	"input": SampleEndSongSubset()
	}

	def output(self):
	return HdfsTarget('stream_count', schema=load_avsc("ExamplePlaysByCountry.avsc"))