Created
January 2, 2024 21:21
-
-
Save Fluepke/2e55b9c364c6b65aece31891b54b220f to your computer and use it in GitHub Desktop.
Fetch crt.sh database as CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Database parameters | |
HOST="crt.sh" | |
PORT="5432" | |
USER="guest" | |
DB="certwatch" | |
# File to store the data | |
OUTPUT_FILE="certificates.csv" | |
# Chunk size | |
CHUNK_SIZE=50000 | |
# Initialize counter | |
OFFSET=0 | |
# Fetch the header only for the first chunk | |
psql -h "$HOST" -p "$PORT" -U "$USER" -d "$DB" -c "COPY (SELECT * FROM certificate ORDER BY id LIMIT 1) TO STDOUT WITH CSV HEADER" > "$OUTPUT_FILE" | |
# Download data in chunks | |
while : ; do | |
echo "Fetching chunk starting from offset $OFFSET..." | |
# Copy chunk to temporary file | |
TEMP_FILE=$(mktemp) | |
psql -h "$HOST" -p "$PORT" -U "$USER" -d "$DB" -c "COPY (SELECT * FROM certificate ORDER BY id OFFSET $OFFSET LIMIT $CHUNK_SIZE) TO STDOUT WITH CSV" | pv -l -s "${CHUNK_SIZE}" > "$TEMP_FILE" | |
# Check if the chunk is empty | |
if [ ! -s "$TEMP_FILE" ]; then | |
echo "No more data to fetch." | |
break | |
fi | |
# Append chunk to output file and remove temporary file | |
cat "$TEMP_FILE" >> "$OUTPUT_FILE" | |
rm "$TEMP_FILE" | |
# Increment offset | |
((OFFSET += CHUNK_SIZE)) | |
done | |
echo "Data fetching complete." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment