Skip to content

Instantly share code, notes, and snippets.

@darrensapalo
Last active January 8, 2021 03:01
Show Gist options
  • Save darrensapalo/a31da9775fe614276746588ef58d794e to your computer and use it in GitHub Desktop.
Save darrensapalo/a31da9775fe614276746588ef58d794e to your computer and use it in GitHub Desktop.
Process log data from paper trail
#! /usr/bin/fish
# Step 1: Copy paste log file from paper trail
# Step 2: Save data into file, where each line looks like the following:
# Dec 20 22:22:11 staging[delivery-service-hasura-58f69d58f6-2cxhl {"type":"query-log","timestamp":"2020-12-21T06:22:11.043+0000","level":"info","detail":{"request_id":"616a3367629d8b436e3b0e79ba14f401","generated_sql":null,"query":{"variables":{"driver_id":"ea078cc9-28b8-40d6-8647-3141450fd683","location_details":{"uuid":"98ce4b1b-dfaa-4916-883e-a22724e48f05","is_moving":true,"odometer":6073.2,"activity":{"confidence":100,"type":"still"},"battery":{"is_charging":true,"level":0.99},"timestamp":"2020-12-21T06:22:10.410Z","coords":{"heading_accuracy":-1,"speed_accuracy":-1,"heading":-1,"latitude":26.1890645,"speed":-1,"altitude_accuracy":-1,"accuracy":1602,"altitude":-1,"longitude":50.5934393},"sample":true,"extras":{}}},"operationName":"UpdateDriverLocation","query":"mutation UpdateDriverLocation($driver_id: uuid!, $location_details: jsonb) {\n update_drivers(\n where: {id: {_eq: $driver_id}}\n _set: {location_details: $location_details}\n ) {\n affected_rows\n __typename\n }\n}\n"}}}
# Step 3: Edit input and output file
set INPUT_FILE ./raw-logs-jan6-7.log
set OUTPUT_FILE "$INPUT_FILE.location-only.log"
set PROCESSED_FILE "$INPUT_FILE.processed.json"
set POST_PROCESSED_FILE "$INPUT_FILE.postprocessed.json"
set FILTERED_FILE "$INPUT_FILE.filtered.json"
set CSV_FILE "$INPUT_FILE.location-only.csv"
# Step 4: Remove start of the line so that what will remain is the JSON object
sed -r "s/[-a-zA-Z0-9\d: ]+staging\[delivery-service-hasura-[-a-zA-Z0-9]+//g" \
$INPUT_FILE > $OUTPUT_FILE
# Step 5: Use `jq` to retrieve only the driver and the location details.
# Optional Step: Apply filters e.g. filter by selecting a driver: `0d0f93e3-b6c7-424e-879e-33efb19f78d2`, `a9755ec8-c46a-480c-92c2-02c7cb8211e5`, and `bca7db07-efea-452e-9d49-aca3cb16c77b`
cat $OUTPUT_FILE | jq '{ driver: .detail.query.variables.driver_id, location_details: .detail.query.variables.location_details }' | \
# jq 'select(.driver == "0d0f93e3-b6c7-424e-879e-33efb19f78d2") ' \
# jq 'select(.driver == "a9755ec8-c46a-480c-92c2-02c7cb8211e5") ' \
# jq 'select(.driver == "bca7db07-efea-452e-9d49-aca3cb16c77b") ' \
> $PROCESSED_FILE
# Step 6: Edit the JSON data shown per line into one JSON array. Add `,` after the `}` brace. Enclose in brackets.
# Remove new lines
tr -d '\n' < $PROCESSED_FILE > "$PROCESSED_FILE.trimmed"
# Add comma after }
awk '{ gsub("}{","},\n{"); print }' "$PROCESSED_FILE.trimmed" > $POST_PROCESSED_FILE
# Add [ at the start
sed -i '1s;^;[;' $POST_PROCESSED_FILE
# Append ] at the end
echo ']' >> $POST_PROCESSED_FILE
# Step 7: Postprocess to retrieve only three fields: latitude, longitude, and gps timestamp as csv.
#
# References: https://dev.to/dannylee8/using-jq-to-convert-json-to-csv-top-level-values-f14
cat $POST_PROCESSED_FILE | jq '.[] | [.location_details.coords.latitude, .location_details.coords.longitude, .location_details.timestamp] | @csv' > $CSV_FILE
# Append CSV header
sed -i '1s;^;latitude,longitude,timestamp\n;' $CSV_FILE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment