Skip to content

Instantly share code, notes, and snippets.

@morefreeze
Created January 24, 2018 06:56
Show Gist options
  • Save morefreeze/833caeaa66b4f87d75fc28a4bd57ddb2 to your computer and use it in GitHub Desktop.
Save morefreeze/833caeaa66b4f87d75fc28a4bd57ddb2 to your computer and use it in GitHub Desktop.
Convert hql to csv format
# coding: utf-8
from __future__ import print_function
import subprocess
import sys
import argparse
def main():
parser = argparse.ArgumentParser()
mut_group = parser.add_mutually_exclusive_group()
mut_group.add_argument('-e', '--hql', type=str)
mut_group.add_argument('-f', '--file', type=str)
parser.add_argument('-n', '--dry-run', action='store_true')
parser.add_argument('-N', '--skip-column-names', action='store_true')
parser.add_argument('argv', nargs='*')
args = parser.parse_args()
if args.file:
with open(args.file, 'r') as f:
hql = ' '.join([line for line in f.readlines() if not line.startswith('#')])
else:
hql = args.hql
for i, argv in enumerate(args.argv):
hql = hql.replace('$%s' % (i + 1), argv)
if args.dry_run:
print(hql)
return
cmds = [
'sudo',
'su',
'-c',
'/home/hadoop/hive/bin/beeline -u jdbc:hive2:// --outputformat=csv2 --silent=true --showHeader=true -e "%s"' % (hql),
'-',
'hadoop',
]
process = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=sys.stderr)
stdout, _ = process.communicate()
if stdout:
if args.skip_column_names:
stdout = '\n'.join(stdout.split('\n')[1:])
print(stdout)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment