Created
April 10, 2013 15:39
-
-
Save philz/5355729 to your computer and use it in GitHub Desktop.
Query impala directly from Python. Note that doesn't do things like query cancellation that impala-shell actually does.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Apache License | |
# | |
# $python impala.py 'select "hello", "there"' | |
# ['hello\tthere'] | |
import sys | |
import os | |
BATCH=1000 | |
def query(query, host='localhost', port=21000): | |
from beeswaxd import BeeswaxService | |
from beeswaxd.BeeswaxService import QueryState | |
from ImpalaService import ImpalaService | |
from ImpalaService.ImpalaService import TImpalaQueryOptions | |
from ImpalaService.constants import DEFAULT_QUERY_OPTIONS | |
from Status.ttypes import TStatus, TStatusCode | |
from thrift.transport.TSocket import TSocket | |
from thrift.transport.TTransport import TBufferedTransport, TTransportException | |
from thrift.protocol import TBinaryProtocol | |
from thrift.Thrift import TApplicationException | |
sock = TSocket(host, port) | |
transport = TBufferedTransport(sock) | |
transport.open() | |
protocol = TBinaryProtocol.TBinaryProtocol(transport) | |
service = ImpalaService.Client(protocol) | |
service.PingImpalaService() | |
bq = BeeswaxService.Query() | |
bq.query = query | |
bq.options = [] | |
handle = service.query(bq) | |
while True: | |
state = service.get_state(handle) | |
if state == QueryState._NAMES_TO_VALUES["FINISHED"]: | |
break | |
elif state == QueryState._NAMES_TO_VALUES["EXCEPTION"]: | |
raise Exception() | |
else: | |
time.sleep(0.5) | |
ret = [] | |
while True: | |
results = service.fetch(handle, False, BATCH) | |
ret.extend(results.data) | |
if not results.has_more: | |
return ret | |
def adjust_sys_path(): | |
real = os.path.realpath("/usr/bin/impala-shell") | |
comps = real.split("/") | |
comps = comps[0:-2] | |
comps.append("lib") | |
comps.append("impala-shell") | |
sys.path.append(os.path.join(*(["/"] + comps + ["gen-py"]))) | |
sys.path.append(os.path.join(*(["/"] + comps + ["lib"]))) | |
if __name__ == "__main__": | |
adjust_sys_path() | |
print query(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment