Skip to content

Instantly share code, notes, and snippets.

@jlewi
Created June 11, 2016 00:33
Show Gist options
  • Save jlewi/a41e233310c072a0ae93930533f64225 to your computer and use it in GitHub Desktop.
Save jlewi/a41e233310c072a0ae93930533f64225 to your computer and use it in GitHub Desktop.
Cache the example proto
import argparse
import logging
import json
import time
from google.protobuf import json_format
from tensorflow.core.example import example_pb2
import google.cloud.dataflow as df
e = example_pb2.Example()
e.features.feature['id'].bytes_list.value.append('')
e.features.feature['target'].int64_list.value.append(0)
for i in range(1, 271):
e.features.feature['Z' + str(i)].float_list.value.append(0)
def convert_row_to_json(row):
global e
e.features.feature['id'].bytes_list.value[0] = str(row[0])
e.features.feature['target'].int64_list.value[0] = row[271]
# Add features to predict on.
for i in range(1, 271):
e.features.feature['Z' + str(i)].float_list.value[0] = row[i]
# ...other features ...
j = json_format.MessageToJson(e)
# Remove newlines.
flat = json.dumps(json.loads(j))
return flat
if __name__ == "__main__":
row = ["h"]
for i in range(1, 272):
row.append(i)
start = time.clock()
num = 1000
for i in range(num):
convert_row_to_json(row)
end = time.clock()
batch_time = end-start
print "Time (seconds): {0}".format(batch_time)
all_time = 600000.0/num * batch_time
print "Time to process 600K records. {0}".format(all_time)
# With 3 CPUs
three_cpu = all_time /3.0
print "Time to process 600K records with 3 cpus. {0}".format(three_cpu)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment