Creates time based Glue partitions given time range.
Keep in mind that you don't need data to add partitions. So, you can create partitions for a whole year and add the data to S3 later.
# Parallelly download all aws-lambda functions | |
# Assumes you have ran `aws configure` and have output-mode as "text" | |
# Works with "aws-cli/1.16.72 Python/3.6.7 Linux/4.15.0-42-generic botocore/1.12.62" | |
download_code () { | |
local OUTPUT=$1 | |
aws lambda get-function --function-name $OUTPUT | head -n 1 | cut -f 2 | xargs wget -O ./lambda_functions/$OUTPUT.zip | |
} | |
mkdir lambda_functions | |
for run in $(aws lambda list-functions | cut -f 6 | xargs); |
CREATE TEMP FUNCTION accent2latin(word STRING) AS | |
(( | |
WITH lookups AS ( | |
SELECT | |
'ã,ç,æ,œ,á,é,í,ó,ú,à,è,ì,ò,ù,ä,ë,ï,ö,ü,ÿ,â,ê,î,ô,û,å,ø,Ø,Å,Á,À,Â,Ä,È,É,Ê,Ë,Í,Î,Ï,Ì,Ò,Ó,Ô,Ö,Ú,Ù,Û,Ü,Ÿ,Ç,Æ,Œ,ñ' AS accents, | |
'a,c,ae,oe,a,e,i,o,u,a,e,i,o,u,a,e,i,o,u,y,a,e,i,o,u,a,o,O,A,A,A,A,A,E,E,E,E,I,I,I,I,O,O,O,O,U,U,U,U,Y,C,AE,OE,n' AS latins | |
), | |
pairs AS ( | |
SELECT accent, latin FROM lookups, | |
UNNEST(SPLIT(accents)) AS accent WITH OFFSET AS p1, |
CREATE TEMP FUNCTION accent2latin(word STRING) AS | |
(( | |
WITH lookups AS ( | |
SELECT | |
'ã,ç,æ,œ,á,é,í,ó,ú,à,è,ì,ò,ù,ä,ë,ï,ö,ü,ÿ,â,ê,î,ô,û,å,ø,Ø,Å,Á,À,Â,Ä,È,É,Ê,Ë,Í,Î,Ï,Ì,Ò,Ó,Ô,Ö,Ú,Ù,Û,Ü,Ÿ,Ç,Æ,Œ,ñ' AS accents, | |
'a,c,ae,oe,a,e,i,o,u,a,e,i,o,u,a,e,i,o,u,y,a,e,i,o,u,a,o,O,A,A,A,A,A,E,E,E,E,I,I,I,I,O,O,O,O,U,U,U,U,Y,C,AE,OE,n' AS latins | |
), | |
pairs AS ( | |
SELECT accent, latin FROM lookups, | |
UNNEST(SPLIT(accents)) AS accent WITH OFFSET AS p1, |
import boto3 | |
table_filter = 'temp-cap' | |
dynamo = boto3.client('dynamodb') | |
tables = dynamo.list_tables()['TableNames'] | |
tables = [t for t in tables if table_filter in t] | |
for t in tables: | |
try: | |
dynamo.delete_table(TableName=t) | |
except: | |
continue |
-- Delete *.csv.metadata | |
-- aws s3 rm s3://... --recursive --exclude '*.csv' | |
CREATE EXTERNAL TABLE `osm_pems_ids`( | |
`osm_id` bigint COMMENT '', | |
`sensor_id` string COMMENT '', | |
`neigh_level` bigint COMMENT '') | |
ROW FORMAT SERDE | |
'org.apache.hadoop.hive.ql.io.orc.OrcSerde' | |
STORED AS INPUTFORMAT |
# update-function-code-aws-lambda.sh <function path> <lambda-function-name> | |
# make sure that the aws-cli is configured in the same region and with a user with permission | |
rm temp.zip | |
echo ------ Zipping -------- | |
cp $1 lambda_function.py | |
zip temp.zip lambda_function.py | |
rm lambda_function.py | |
echo ------ Updating Lambda ------ | |
aws lambda update-function-code --function-name $2 --zip-file "fileb://temp.zip" |
rm -r python | |
rm lambda_layer | |
mkdir python | |
printf "[install]\nprefix=" > ~/.pydistutils.cfg | |
pip3.7 install $1 -t python/ # insert any pip available module, repeat if necessary | |
printf "" > ~/.pydistutils.cfg | |
zip -r lambda_layer.zip ./python | |
aws s3 cp lambda_layer.zip s3://config-lambda/layers/$1/ | |
aws lambda publish-layer-version \ | |
--layer-name $1 \ |
SELECT | |
'{"type":"LineString", "coordinates":' || | |
'[' || array_join(transform(line, loc -> '[' || CAST(loc.x AS VARCHAR) || ',' || CAST(loc.y AS VARCHAR) || ']'), ',') || ']}' | |
FROM test.test |
Creates time based Glue partitions given time range.
Keep in mind that you don't need data to add partitions. So, you can create partitions for a whole year and add the data to S3 later.
CREATE TABLE waze.polygons_geo | |
WITH ( | |
external_location = 's3://...', | |
format = 'Parquet') AS | |
WITH dataset AS ( | |
SELECT | |
polygons | |
FROM waze.polygons) | |
SELECT | |
pol.polygon, |