Skip to content

Instantly share code, notes, and snippets.

@vdenotaris
Last active October 5, 2018 09:33
Show Gist options
  • Save vdenotaris/22b9abfb44ea21cefda0a47c0197cb2a to your computer and use it in GitHub Desktop.
Save vdenotaris/22b9abfb44ea21cefda0a47c0197cb2a to your computer and use it in GitHub Desktop.
StarSchemaBenchmark tbl2csv converter
# Author: Vincenzo De Notaris (vdenotaris@google.com)
# aka a not so brilliant Python programmer!
# Reference: https://github.com/lemire/StarSchemaBenchmark
# Libs
import sys
import os
# Parsing function
def parseAndWrite(name, header):
csv = open("".join([path, name, ".csv"]), "w+")
csv.write(header + "\n")
tbl = open("".join([path, name, ".tbl"]), "r")
lines = tbl.readlines()
for line in lines:
length = len(line)
line = line[:length - 2] + line[length-1:]
line = line.replace(",","N")
line = line.replace("|",",")
csv.write(line)
tbl.close()
csv.close()
print "- " + name + " [Done]"
# Let's do this dirty job
path = os.path.dirname(os.path.abspath(__file__)) + "/"
ssb = [
("customer","c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment"),
("date","d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl"),
("lineorder","lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordertotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode"),
("part","p_partkey,p_name,p_mfgr,p_category,p_brand1,p_color,p_type,p_size,p_container"),
("supplier","s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone")
]
print "----------------------"
print "StarSchemaBenchmark tbl2csv converter\nSee: https://github.com/lemire/StarSchemaBenchmark"
print "----------------------"
print "Converting..."
for t in ssb:
parseAndWrite(t[0],t[1])
print "That's all folks!"
print "----------------------"
# Have fun!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment