Skip to content

Instantly share code, notes, and snippets.

@mateuszkwiatkowski
Created November 30, 2022 14:09
Show Gist options
  • Save mateuszkwiatkowski/21a296da982230e963b66842c1ae6643 to your computer and use it in GitHub Desktop.
Save mateuszkwiatkowski/21a296da982230e963b66842c1ae6643 to your computer and use it in GitHub Desktop.
Posthog cleanup
#!/usr/bin/env python
import logging
import os
from datetime import datetime, timedelta
import django
django.setup()
from posthog.models import Event, ElementGroup, Person
from django.utils import timezone
max_age_days = int(os.getenv("POSTHOG_CLEANUP_OLDER_THAN_DAYS", 30))
step_size = int(os.getenv("POSTHOG_CLEANUP_BATCH_SIZE", 100))
dry_run = True if os.getenv("POSTHOG_CLEANUP_DRY_RUN", "False").lower() in ["true", "yes", "1"] else False
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S ')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "posthog.settings")
def get_events_older_than(older_than):
return Event.objects.filter(timestamp__lt=timezone.make_aware(datetime.now() - timedelta(older_than))).values_list(
'pk', flat=True)
def get_non_referenced_event_groups():
event_group_hashes = Event.objects.all().values_list('elements_hash', flat=True)
return ElementGroup.objects.exclude(hash__in=list(event_group_hashes))
def get_all_persons():
return Person.objects.all()
def delete_items(item_type, items):
if dry_run:
logging.info("Skipping delete of items in dry run mode...")
return
item_type.objects.filter(id__in=list(items)).delete()
def delete_items_batched(item_type, items, logging_indent=6 * " "):
number_of_items = len(items)
logging.info("%sDeleting %d items of type %s using batches of %d size:", logging_indent, number_of_items,
item_type.__name__, step_size)
last_id = 0
while last_id + step_size <= number_of_items:
delete_items(item_type, items[last_id:last_id + step_size])
logging.info("%s %d%%", logging_indent, int(last_id / number_of_items * 100))
last_id += step_size
delete_items(item_type, items[last_id:])
logging.info("%s 100%%", logging_indent)
if __name__ == "__main__":
logging.info("Running cleanup of PostHog...")
start_time = datetime.now()
logging.info(" - Deleting all events older than %d days:", max_age_days)
delete_items_batched(Event, get_events_older_than(max_age_days))
logging.info(" - Deleting all Person objects")
delete_items_batched(Person, get_all_persons())
logging.info(" - Deleting all elements and element groups not referenced by any event anymore:")
delete_items_batched(ElementGroup, get_non_referenced_event_groups())
logging.info("Cleanup finished, total duration: %s", datetime.now() - start_time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment