Last active
February 27, 2018 17:16
-
-
Save sebastien-collet/3ce891c7ea86f6a091997ef411db8cba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ibis | |
import pandas as pd | |
import os | |
# ====== Ibis conf (to avoid a bug) ====== | |
with ibis.config.config_prefix('impala'): | |
ibis.config.set_option('temp_db', '`__ibis_tmp`') | |
# ====== Connection ====== | |
# Connecting to Hive by providing Hive host ip and port (10000 by default) and a Webhdfs client | |
hdfs = ibis.hdfs_connect(host=os.environ['IP_HDFS'], port=50070) | |
client = ibis.impala.connect(host=os.environ['IP_HIVE'], port=10000, hdfs_client=hdfs, user=os.environ['USER'], password=os.environ['PASSWORD'], auth_mechanism='PLAIN') | |
# ====== Writing table ====== | |
# Creating a simple pandas DataFrame with two columns | |
liste_hello = ['hello1','hello2'] | |
liste_world = ['world1','world2'] | |
df = pd.DataFrame(data = {'hello' : liste_hello, 'world': liste_world}) | |
# Writing Dataframe to Hive if table name doesn't exist | |
db = client.database('default') | |
if not client.exists_table('helloworld'): | |
db.create_table('helloworld', df) | |
t = db['helloworld'] | |
t.execute() | |
# ====== Reading table ====== | |
# Selecting data with a SQL query | |
#limit=None to get the whole table, otherwise will only get 10000 first lines | |
requete = client.sql('select * from helloworld') | |
df = requete.execute(limit=None) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment