Last active
August 10, 2020 22:11
-
-
Save angrychimp/3b6efb260524a38f2c544b2ee83d0028 to your computer and use it in GitHub Desktop.
Create an Athena table from danielmiessler/SecLists
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Assumes that your AWS CLI default profile is set. If not, set the AWS_PROFILE environment variable | |
SECLISTS_BUCKET=my-seclists-bucket | |
ATHENA_OUTPUT_BUCKET=my-athena-output-bucket | |
# Create the bucket (if necessary) | |
if [[ -n $(aws s3 ls s3://$SECLISTS_BUCKET 2>&1 | grep 'does not exist') ]]; then | |
aws s3 mb s3://$SECLISTS_BUCKET --region $(aws configure get region) | |
fi | |
# Download the current copy of the SecLists datastore | |
curl -LO https://github.com/danielmiessler/SecLists/archive/master.zip | |
unzip master.zip | |
aws s3 sync SecLists-master s3://$SECLISTS_BUCKET/ | |
# Create the Athena table | |
echo -n "Creating table... " | |
aws athena start-query-execution --query-string "CREATE EXTERNAL TABLE IF NOT EXISTS seclists (pattern string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES ( 'serialization.format' = ',' ) LOCATION 's3://$SECLISTS_BUCKET/';" --result-configuration OutputLocation=s3://$ATHENA_OUTPUT_BUCKET/ --output text | |
# Additional queries can now be executed | |
# Consider limiting queries to specific groups of content (avoiding README files for example) | |
# e.g. | |
echo -n "Getting sample data... " | |
query="SELECT pattern, \"\$path\" as path FROM seclists WHERE \"\$path\" LIKE '%/Discovery/Web_Content/%' LIMIT 10" | |
qid=$(aws athena start-query-execution --query-string "$query" --result-configuration OutputLocation=s3://$ATHENA_OUTPUT_BUCKET/ --output text) | |
echo $qid | |
state=$(aws athena get-query-execution --query-execution-id $qid --query "QueryExecution.Status.State" --output text) | |
while [[ "$state" == "RUNNING" ]]; do | |
echo -n '.' && sleep 1 | |
state=$(aws athena get-query-execution --query-execution-id $qid --query "QueryExecution.Status.State" --output text) | |
done | |
echo $state | |
aws s3 cp s3://$ATHENA_OUTPUT_BUCKET/$qid.csv . && head $qid.csv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment