import boto3
client = boto3.client('glue')
response = client.create_crawler(
Name='SalesCSVCrawler',
Role='AWSGlueServiceRoleDefault',
DatabaseName='sales-cvs',
Description='Crawler for generated Sales schema',
Targets={
'S3Targets': [
{
'Path': 's3://ejlp12-etl-demo-bucket/data/csv',
'Exclusions': [
]
},
]
},
SchemaChangePolicy={
'UpdateBehavior': 'UPDATE_IN_DATABASE',
'DeleteBehavior': 'DELETE_FROM_DATABASE'
}
#,Configuration='{ "Version": 1.0, "CrawlerOutput": { "Partitions": { "AddOrUpdateBehavior": "InheritFromTable" } } }'
)
response = client.start_crawler(
Name='SalesCSVCrawler'
)
response = client.update_table(
DatabaseName='sales-cvs',
TableInput={
'Name': 'csv',
'Description': 'Table Sales',
'StorageDescriptor': {
'SerdeInfo': {
'Name': 'OpenCSVSerde',
'SerializationLibrary': 'org.apache.hadoop.hive.serde2.OpenCSVSerde',
'Parameters': {
'separatorChar': ','
}
}
}
}
)
Last active
March 22, 2022 18:29
-
-
Save ejlp12/30d67c07bf9e46b98a350569976f08aa to your computer and use it in GitHub Desktop.
AWS Glue Create Crawler, Run Crawler and update Table to use "org.apache.hadoop.hive.serde2.OpenCSVSerde"
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment