Skip to content

Instantly share code, notes, and snippets.

@xhit
Last active September 16, 2021 04:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xhit/472a244ce5107097973055f21dd8f326 to your computer and use it in GitHub Desktop.
Save xhit/472a244ce5107097973055f21dd8f326 to your computer and use it in GitHub Desktop.
# Connection to source file phone.csv
[[connections]]
id = 'csv-source'
type = 'csv'
path = 'path/to/phone.csv'
delimiter = ','
# Connection to destination file phoneprocessed.csv
[[connections]]
id = 'csv-dest'
type = 'csv'
path = 'path/to/phoneprocessed.csv'
delimiter = ','
[[jobs]]
id = 'job_phone'
type = 'dataflow'
omit_rows = 1 # omit first row because is the header
[jobs.source_config]
connection_id = 'csv-source'
[jobs.destination_config]
connection_id = 'csv-dest'
columns_name_in_first_row = true # column name will be the value in column_destination key
[[jobs.mapping]]
column_source = '0'
column_destination = 'id_processed' # destination column name
[[jobs.mapping]]
column_source = '1'
column_destination = 'number_processed' # destination column name
[[jobs.mapping]]
column_source = "regexReplace(mapping.string.1,'[^0-9]','')"
source_is_expression = true
column_destination = 'number_cleaned' # destination column name
# omit rows where, when all characters not numbers were removed, his length is not 10
[jobs.skip]
rule = "len(regexReplace(mapping.string.1,'[^0-9]','')) != 10"
# Connection to source file phone.csv
[[connections]]
id = 'csv-source'
type = 'csv'
path = 'path/to/phone.csv'
delimiter = ','
# Connection to destination file phoneprocessed.csv
[[connections]]
id = 'csv-dest'
type = 'csv'
path = 'path/to/phoneprocessed.csv'
delimiter = ','
[[jobs]]
id = 'job_phone'
type = 'dataflow'
omit_rows = 1 # omit first row because is the header
[jobs.source_config]
connection_id = 'csv-source'
[jobs.destination_config]
connection_id = 'csv-dest'
columns_name_in_first_row = true # column name will be the value in column_destination key
[[jobs.mapping]]
column_source = '0'
column_destination = 'id_processed' # destination column name
[[jobs.mapping]]
column_source = '1'
column_destination = 'number_processed' # destination column name
[[jobs.mapping]]
column_source = "regexReplace(mapping.string.1,'[^0-9]','')"
source_is_expression = true
column_destination = 'number_cleaned' # destination column name
# omit rows where, when all characters not numbers were removed, his length is not 10
[jobs.skip]
rule = "len(regexReplace(mapping.string.1,'[^0-9]','')) != 10"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment