Skip to content

Instantly share code, notes, and snippets.

@rail44
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rail44/37ea52adfbfeeb255e8b to your computer and use it in GitHub Desktop.
Save rail44/37ea52adfbfeeb255e8b to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# coding: utf-8
class LtsvConverter:
"""The class to create parsed view of table that has single column with LTSV.
This excepts that all lines have same order of fields.
client: client of BigQuery-Python
"""
def __init__(self, client, dataset='', table='', field='f0'):
self.client = client
self.project = client.project_id
self.dataset = dataset
self.table = table
self.field = field
def convert(self):
query = self._get_query()
body = {
'tableReference': {
'tableId': '{}_parsed'.format(self.table),
'projectId': self.project,
'datasetId': self.dataset
},
"view": {
"query": query
}
}
self.client.bigquery.tables().insert(body=body, datasetId=self.dataset, projectId=self.project).execute()
def _get_line(self):
job_id, _results = self.client.query('SELECT {} FROM [{}.{}] LIMIT 1'.format(self.field, self.dataset, self.table))
complete, row_count = self.client.check_job(job_id)
results = self.client.get_query_rows(job_id)
return results[0][self.field]
def _get_keys(self):
line = self._get_line()
return [kv.split(':')[0] for kv in [f for f in line.split("\t")]]
def _get_query(self):
keys = self._get_keys()
selects = ', '.join(['NTH(2, SPLIT({}, ":")) {}'.format(k, k) for k in keys[1:]])
fields = ',\n'.join(['LEAD(fields, {}) OVER (PARTITION BY f0 ORDER BY pos) {}'.format(i+1, k) for i, k in enumerate(keys[1:])])
return '''
SELECT NTH(2, SPLIT(fields, ":")) {}, {} FROM (
SELECT
fields,
pos,
{}
FROM FLATTEN ((SELECT f0, fields, POSITION(fields) pos FROM (
SELECT SPLIT({}, '\\t') fields, {} f0 FROM [{}.{}])
), fields) GROUP BY f0, fields, pos
) where pos=1;
'''.format(keys[0], selects, fields, self.field, self.field, self.dataset, self.table)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment