Skip to content

Instantly share code, notes, and snippets.

@Wittline
Created June 23, 2021 19:44
Show Gist options
  • Save Wittline/44ce03080d26618027b235ed325d7da9 to your computer and use it in GitHub Desktop.
Save Wittline/44ce03080d26618027b235ed325d7da9 to your computer and use it in GitHub Desktop.
{
"steps":
[
{
"executor_memory": "18G",
"executor_cores": "4",
"description" : "Reading from dataset data and filtering",
"name": "step_0",
"guiid": "0",
"ActionOnFailure": "CANCEL_AND_WAIT",
"script_uri":"pyspark/pyspark_preprocessing_text.py",
"script_args":
{
"auto_generate_output": "1",
"output_uri": "None",
"format_output": ".parquet",
"external_input": "1",
"input_dependency_from_output_step": "0",
"from_step": "None",
"input_data": "s3://amazon-reviews-pds/parquet/product_category=Books",
"local_input": ""
}
},
{
"executor_memory": "18G",
"executor_cores": "4",
"description" : "Gnerating final dataset",
"name": "step_1",
"guiid": "1",
"ActionOnFailure": "CANCEL_AND_WAIT",
"script_uri":"pyspark/pyspark_grouping_words.py",
"script_args":
{
"auto_generate_output": "1",
"output_uri": "None",
"format_output": ".parquet",
"external_input": "0",
"input_dependency_from_output_step": "1",
"from_step": "step_0",
"input_data": "None",
"local_input": ""
}
},
{
"executor_memory": "18G",
"executor_cores": "4",
"description" : "generating clouds",
"name": "step_2",
"guiid": "2",
"ActionOnFailure": "CANCEL_AND_WAIT",
"script_uri":"pyspark/generate_clouds.py",
"script_args":
{
"auto_generate_output": "1",
"output_uri": "None",
"format_output": ".png",
"external_input": "0",
"input_dependency_from_output_step": "1",
"from_step": "step_1",
"input_data": "None",
"local_input": "data/usa.png"
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment