Skip to content

Instantly share code, notes, and snippets.

@zimmerst
Created June 1, 2016 12:54
Show Gist options
  • Save zimmerst/3aef247b3940323a6765fd648a42a23a to your computer and use it in GitHub Desktop.
Save zimmerst/3aef247b3940323a6765fd648a42a23a to your computer and use it in GitHub Desktop.
remove orphans & duplicates - but no removal yet.
from progressbar import ProgressBar, ETA, Percentage, Bar
widgets = [Percentage(), Bar(), ETA()]
from sys import exit as sys_exit
from DmpWorkflow.core.models import JobInstance, Job
from DmpWorkflow.core import db
db.connect()
all_jobs = Job.objects.all()
n_registered_instances = sum([JobInstance.objects.filter(job=j).count() for j in all_jobs])
n_instances_total = JobInstance.objects.all().count()
instances = JobInstance.objects.all()
if n_registered_instances == n_instances_total:
print 'no duplicates or orphans found, returning'
sys_exit()
instances = JobInstance.objects.all()
print 'found %i instances'%n_instances_total
pb = ProgressBar(widgets=widgets,maxval=n_instances_total)
duplicates = []
orphans = []
good_instances = []
pb.start()
for i,inst in enumerate(instances):
if not inst.job in all_jobs:
orphans.append(inst)
continue
if inst.instanceId in good_instances:
duplicates.append(inst)
continue
good_instances.append(inst.instanceId)
pb.update(i+1)
pb.finish()
if len(duplicates):
print 'found %i duplicates'%len(duplicates)
if len(orphans):
print 'found %i orphans'%len(orphans)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment