Skip to content

Instantly share code, notes, and snippets.

@darksidelemm
Created November 3, 2018 08:26
Show Gist options
  • Save darksidelemm/3fbf871bcedf63756f204ee96b493918 to your computer and use it in GitHub Desktop.
Save darksidelemm/3fbf871bcedf63756f204ee96b493918 to your computer and use it in GitHub Desktop.
Find Radiosonde-Related Payload Configuration documents within the Habitat DB.
#!/usr/bin/env python
#
# Habitat Radiosonde Cleanup Utility
#
# Mark Jessop 2018-11
#
# Search for payload configuration documents which have been automatically generated by
# radiosonde_auto_rx. Eventually, delete the ones that are older than X weeks.
#
import couchdb
import re
import time
HABITAT_SERVER = 'http://habitat.habhub.org/'
HABITAT_DB = 'habitat'
# Callsign filter - All newer radiosonde callsigns have been prefixed with 'RS_'
CALLSIGN_FILTER = 'RS_'
# Filter out only payload docs older than 2 weeks.
AGE_FILTER = 14*24*60*60 # 2 weeks.
_max_timestamp = int(time.time()-AGE_FILTER)
couch = couchdb.Server(HABITAT_SERVER)
db = couch[HABITAT_DB]
# Store for payload IDs.
payload_docs = []
# Count how many payload docs we have processed.
_id_count = 0
for row in db.iterview('payload_configuration/name_time_created',batch=100):
# Extract the name and time keys.
_name = row.key[0]
_time = int(row.key[1])
# Determine if the name either starts with our filter, or matches a known radiosonde serial number format.
if _name.startswith(CALLSIGN_FILTER) or re.match(r'[E-Z][0-5][\d][1-7]\d{4}', _name):
# Check if the document matches our minimum age criteria.
if _time < _max_timestamp:
# Append it to the list.
payload_docs.append(row.id)
# Indicate how many documents we have processed so far.
_id_count += 1
if (_id_count % 1000) == 0:
print("Processed %d docs." % _id_count)
# TODO: Delete the documents.
print("Found %d radiosonde payload documents (out of %d total)."% (len(payload_docs), _id_count))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment