Created
June 11, 2016 00:33
-
-
Save jlewi/a41e233310c072a0ae93930533f64225 to your computer and use it in GitHub Desktop.
Cache the example proto
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import json | |
import time | |
from google.protobuf import json_format | |
from tensorflow.core.example import example_pb2 | |
import google.cloud.dataflow as df | |
e = example_pb2.Example() | |
e.features.feature['id'].bytes_list.value.append('') | |
e.features.feature['target'].int64_list.value.append(0) | |
for i in range(1, 271): | |
e.features.feature['Z' + str(i)].float_list.value.append(0) | |
def convert_row_to_json(row): | |
global e | |
e.features.feature['id'].bytes_list.value[0] = str(row[0]) | |
e.features.feature['target'].int64_list.value[0] = row[271] | |
# Add features to predict on. | |
for i in range(1, 271): | |
e.features.feature['Z' + str(i)].float_list.value[0] = row[i] | |
# ...other features ... | |
j = json_format.MessageToJson(e) | |
# Remove newlines. | |
flat = json.dumps(json.loads(j)) | |
return flat | |
if __name__ == "__main__": | |
row = ["h"] | |
for i in range(1, 272): | |
row.append(i) | |
start = time.clock() | |
num = 1000 | |
for i in range(num): | |
convert_row_to_json(row) | |
end = time.clock() | |
batch_time = end-start | |
print "Time (seconds): {0}".format(batch_time) | |
all_time = 600000.0/num * batch_time | |
print "Time to process 600K records. {0}".format(all_time) | |
# With 3 CPUs | |
three_cpu = all_time /3.0 | |
print "Time to process 600K records with 3 cpus. {0}".format(three_cpu) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment