Skip to content

Instantly share code, notes, and snippets.

@erichannell
Created February 27, 2015 10:59
Show Gist options
  • Save erichannell/e629d96a3f2bbd78bf6c to your computer and use it in GitHub Desktop.
Save erichannell/e629d96a3f2bbd78bf6c to your computer and use it in GitHub Desktop.
benchmarking the Tableau Extract API
from random import randrange, random, choice
import dataextract as tde
import os
import string
import time
import csv
def write_to_extract(rows_to_write, name):
start = time.time()
current_file = name + '.tde'
with tde.Extract(current_file) as extract:
tableDef = tde.TableDefinition()
tableDef.addColumn('rowID', tde.Type.INTEGER)
tableDef.addColumn('tag', tde.Type.CHAR_STRING)
tableDef.addColumn('value', tde.Type.DOUBLE)
table = None
# 'Extract' is a magic term, you just need to use it.
if not extract.hasTable('Extract'):
# Table does not exist, so create it.
print "Writing", rows_to_write, "rows to a tde"
table = extract.addTable('Extract', tableDef)
else:
# Table exists, so append the new data.
print "Appending", rows_to_write, "rows to a tde"
table = extract.openTable('Extract')
new_row = tde.Row(tableDef)
print "Progress:",
end = rows_to_write
progress = 0
for i in xrange(end):
new_row.setInteger(0, i)
new_row.setCharString(1, choice(string.ascii_lowercase))
new_row.setDouble(2, random())
table.insert(new_row)
# this is just a complicated way to show a progress bar
if int((float(i) / end) * 100) % 5 == 0 and progress != int((float(i) / end) * 100):
progress = int((float(i) / end) * 100)
if progress % 10 == 0:
print str(progress)+'%,',
print "done!"
elapsed_time = round(time.time()-start, 1)
return elapsed_time
def main():
rows_to_write = int(raw_input("Create a .tde with how many rows? "))
name = "test"
tde_time = write_to_extract(rows_to_write, name)
tde_size = os.path.getsize(name + ".tde")
print "*" * 60
print "\nSUMMARY:"
print "Time taken to create tde:", tde_time, "seconds"
print "Size of tde file (bytes):", tde_size
print "\r"
main()
@erichannell
Copy link
Author

This script will append data to an already existing .tde or create a new file (lines 20-28). There is a great tutorial on the Extract API here: http://www.tableau.com/learn/tutorials/on-demand/extract-api-introduction and you can download the dataextract Python module here: http://www.tableau.com/data-extract-api

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment