Skip to content

Instantly share code, notes, and snippets.

@jiapengjp
Created December 17, 2015 02:53
Show Gist options
  • Save jiapengjp/35f8f2980ed4efae27b1 to your computer and use it in GitHub Desktop.
Save jiapengjp/35f8f2980ed4efae27b1 to your computer and use it in GitHub Desktop.
#!/bin/python
import csv
import datetime
import json
import os.path
esstats = []
shards = range(1, 11)
concurrencies = range(1,6)
jsondir = "testdir"
for c in concurrencies:
for size in range(1, 15):
for s in shards:
fname = "%dm_%dshard_c%d.json" % (size*2, s, c)
with open(os.path.join(jsondir, fname)) as fjson:
data = json.load(fjson)
testtest = data["indices"]["testtest"]
esstats.append((testtest["index"]["size_in_bytes"],
testtest["docs"]["num_docs"]))
line = -1
prevT = 0
shardID = 0
conID = 0
esstatID = 0
roundNo = 1
nrows = []
currentBulkSize = 0
with open(os.path.join(jsondir, "upload.log"), "rb") as finput:
rd = csv.reader(finput, delimiter=",")
maxBulkSize = "29360142" # bigger than str(28*1024*1024)
for row in rd:
line += 1
if (line == 0):
continue
t = int(row[0])
time = datetime.datetime.fromtimestamp(t)
if (prevT > 0 and (t - prevT) > 20):
print("----------------------------------")
esstatID+=1
roundNo += 1
shardID += 1
if (shardID == len(shards)):
shardID = 0
if (currentBulkSize == maxBulkSize):
conID += 1
if (conID == len(concurrencies)):
conID = 0
row.append(shards[shardID])
row.append(concurrencies[conID])
row.append(esstats[esstatID][0])
row.append(esstats[esstatID][1])
row.append(roundNo)
print(time, row)
nrows.append(row)
prevT = t
currentBulkSize = row[6]
with open("upload_enriched.log", "wb") as foutput:
wt = csv.writer(foutput)
wt.writerow(["time","post","posted","line","byte","err","bulksize","bulkline","shard","concurrency","essize","esdoc","round"])
wt.writerows(nrows)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment