The mapreduce job we use to transform datastore backups into JSON files that we then load into BigQuery.
mr_pipeline = mapreduce_pipeline.MapperPipeline(
'input_reader': {
'files': list_backup_files(kind, backup_date),
'output_writer': {
'filesystem': 'gs',
'mime_type': 'text/plain',
'output_sharding': 'input',
'gs_bucket_name': get_output_path(kind, backup_date),
'backup_date': backup_date.encode('UTF-8'),
'kind': kind.encode('UTF-8'),
