Skip to content

Instantly share code, notes, and snippets.

@jlewi
Created June 11, 2016 00:32
Show Gist options
  • Save jlewi/adb6e0d11f7f1b97165a5a8e37ec41ff to your computer and use it in GitHub Desktop.
Save jlewi/adb6e0d11f7f1b97165a5a8e37ec41ff to your computer and use it in GitHub Desktop.
profile_row_conversion
import argparse
import logging
import json
import time
from google.protobuf import json_format
from tensorflow.core.example import example_pb2
import google.cloud.dataflow as df
def convert_row_to_json(row):
e = example_pb2.Example()
e.features.feature['id'].bytes_list.value.append(str(row[0]))
e.features.feature['target'].int64_list.value.append(row[271])
# Add features to predict on.
for i in range(1, 271):
e.features.feature['Z' + str(i)].float_list.value.append(row[i])
# ...other features ...
j = json_format.MessageToJson(e)
# Remove newlines.
flat = json.dumps(json.loads(j))
return flat
if __name__ == "__main__":
row = ["h"]
for i in range(1, 272):
row.append(i)
start = time.clock()
num = 1000
for i in range(num):
convert_row_to_json(row)
end = time.clock()
batch_time = end-start
print "Time (seconds): {0}".format(batch_time)
all_time = 600000.0/num * batch_time
print "Time to process 600K records. {0}".format(all_time)
# With 3 CPUs
three_cpu = all_time /3.0
print "Time to process 600K records with 3 cpus. {0}".format(three_cpu)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment