Created
October 7, 2016 12:45
-
-
Save sebastien-collet/80ef622c468d4323cec58f1ced8d8af8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ibis | |
import pandas as pd | |
import os | |
# ====== Connection ====== | |
# Connecting to Impala by providing Impala host ip and port (21050 by default) and a Webhdfs client | |
hdfs = ibis.hdfs_connect(host=os.environ['IP_HDFS'], port=50070) | |
client = ibis.impala.connect(host=os.environ['IP_IMPALA'], port=21050, hdfs_client=hdfs) | |
# ====== Writing table ====== | |
# Creating a simple pandas DataFrame with two columns | |
liste_hello = ['hello1','hello2'] | |
liste_world = ['world1','world2'] | |
df = pd.DataFrame(data = {'hello' : liste_hello, 'world': liste_world}) | |
# Writing Dataframe to Impala if table name doesn't exist | |
db = client.database('default') | |
if not client.exists_table('helloworld'): | |
db.create_table('helloworld', df) | |
t = db['helloworld'] | |
t.execute() | |
# ====== Reading table ====== | |
# Selecting data with a SQL query | |
#limit=None to get the whole table, otherwise will only get 10000 first lines | |
requete = client.sql('select * from helloworld') | |
df = requete.execute(limit=None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment