Skip to content

Instantly share code, notes, and snippets.

@drj42
drj42 / task_params.py
Created November 21, 2015 16:32
TaskParameter quirks
import luigi
class Foo(luigi.Task):
message = 'Foo'
class RunOnceTask(luigi.Task):
my_task = luigi.TaskParameter()
@drj42
drj42 / gist:a1ff0e57e11e930291da
Last active August 29, 2015 14:22
Load csv's in spark
"""
Launch pyspark with the flag:
--packages com.databricks:spark-csv_2.10:1.0.3
"""
from pyspark.sql import SQLContext
from pyspark.sql.types import StructField, StringType, StructType
def create_schema(fields):
""" Create a dataframe schema from a list of field names.
@drj42
drj42 / csvprocessor.py
Last active August 29, 2015 14:18 — forked from miku/csvprocessor.py
from luigi.format import Format
import csvkit
class CSVOutputProcessor(object):
"""
A simple CSV output processor to be hooked into Format's
`pipe_writer`.
If `cols` are given, the names are used as CSV header, otherwise no
explicit header is written.