Created
June 11, 2016 00:32
-
-
Save jlewi/adb6e0d11f7f1b97165a5a8e37ec41ff to your computer and use it in GitHub Desktop.
profile_row_conversion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import json | |
import time | |
from google.protobuf import json_format | |
from tensorflow.core.example import example_pb2 | |
import google.cloud.dataflow as df | |
def convert_row_to_json(row): | |
e = example_pb2.Example() | |
e.features.feature['id'].bytes_list.value.append(str(row[0])) | |
e.features.feature['target'].int64_list.value.append(row[271]) | |
# Add features to predict on. | |
for i in range(1, 271): | |
e.features.feature['Z' + str(i)].float_list.value.append(row[i]) | |
# ...other features ... | |
j = json_format.MessageToJson(e) | |
# Remove newlines. | |
flat = json.dumps(json.loads(j)) | |
return flat | |
if __name__ == "__main__": | |
row = ["h"] | |
for i in range(1, 272): | |
row.append(i) | |
start = time.clock() | |
num = 1000 | |
for i in range(num): | |
convert_row_to_json(row) | |
end = time.clock() | |
batch_time = end-start | |
print "Time (seconds): {0}".format(batch_time) | |
all_time = 600000.0/num * batch_time | |
print "Time to process 600K records. {0}".format(all_time) | |
# With 3 CPUs | |
three_cpu = all_time /3.0 | |
print "Time to process 600K records with 3 cpus. {0}".format(three_cpu) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment