Skip to content

Instantly share code, notes, and snippets.

@ilyaevseev
Created March 17, 2021 00:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ilyaevseev/82643dc011e439fe611c73a9e15106f6 to your computer and use it in GitHub Desktop.
Save ilyaevseev/82643dc011e439fe611c73a9e15106f6 to your computer and use it in GitHub Desktop.
Fix esdedupe runtime errors
diff --git a/esdedupe/esdedupe.py b/esdedupe/esdedupe.py
index 749615e..3e56e90 100755
--- a/esdedupe/esdedupe.py
+++ b/esdedupe/esdedupe.py
@@ -23,6 +23,7 @@ class Esdedupe:
def __init__(self):
self.log = getLogger('esdedupe')
+ self.total = 0
# Process documents returned by the current search/scroll
def build_index(self, docs_hash, unique_fields, hit):
@@ -145,7 +146,7 @@ class Esdedupe:
currStart = args.since
currEnd = args.since + timedelta(seconds=win)
- total = 0
+ self.total = 0
# scan & remove using sliding window
while currEnd < end:
docs = {} # avoid deleting same documents again and again
@@ -153,7 +154,7 @@ class Esdedupe:
args.window, to_es_date(currStart), to_es_date(currEnd)))
args.since = currStart
args.until = currEnd
- total += self.scan_and_remove(es, docs, pk, dupl, index, args)
+ self.total += self.scan_and_remove(es, docs, pk, dupl, index, args)
currStart += timedelta(seconds=win)
currEnd += timedelta(seconds=win)
@@ -162,11 +163,11 @@ class Esdedupe:
to_es_date(currStart), to_es_date(end)))
args.since = currStart
args.until = end
- total += self.scan_and_remove(es, docs, pk, dupl, index, args)
+ self.total += self.scan_and_remove(es, docs, pk, dupl, index, args)
else:
# "normal" index without timestamps
- total += self.scan_and_remove(es, docs, pk, dupl, index, args)
- self.log.info("Altogether {} documents were removed (including doc replicas)".format(total))
+ self.total += self.scan_and_remove(es, docs, pk, dupl, index, args)
+ self.log.info("Altogether {} documents were removed (including doc replicas)".format(self.total))
def scan(self, es, docs_hash, unique_fields, index, args):
i = 0
@@ -188,10 +189,10 @@ class Esdedupe:
if dupl == 0:
self.log.info("No duplicates found")
else:
- total = len(docs_hash)
+ self.total = len(docs_hash)
self.log.info(
"Found {:0,} duplicates out of {:0,} docs, unique documents: {:0,} ({:.1f}% duplicates)".format(
- dupl, dupl+total, total, dupl/(dupl+total)*100)
+ dupl, dupl+self.total, self.total, dupl/(dupl+self.total)*100)
)
if args.log_dupl:
diff --git a/esdedupe/utils.py b/esdedupe/utils.py
index 1a55ba7..f37ad2f 100644
--- a/esdedupe/utils.py
+++ b/esdedupe/utils.py
@@ -7,7 +7,7 @@ import psutil
import datetime
-def bytes_fmt(self, num, suffix='B'):
+def bytes_fmt(num, suffix='B'):
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
@@ -15,7 +15,7 @@ def bytes_fmt(self, num, suffix='B'):
return "%.1f%s%s" % (num, 'Y', suffix)
-def memusage(self):
+def memusage():
process = psutil.Process(os.getpid())
rss = process.memory_info().rss
return bytes_fmt(rss)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment