Skip to content

Instantly share code, notes, and snippets.

@sebastien-collet
Created October 7, 2016 12:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sebastien-collet/80ef622c468d4323cec58f1ced8d8af8 to your computer and use it in GitHub Desktop.
Save sebastien-collet/80ef622c468d4323cec58f1ced8d8af8 to your computer and use it in GitHub Desktop.
import ibis
import pandas as pd
import os
# ====== Connection ======
# Connecting to Impala by providing Impala host ip and port (21050 by default) and a Webhdfs client
hdfs = ibis.hdfs_connect(host=os.environ['IP_HDFS'], port=50070)
client = ibis.impala.connect(host=os.environ['IP_IMPALA'], port=21050, hdfs_client=hdfs)
# ====== Writing table ======
# Creating a simple pandas DataFrame with two columns
liste_hello = ['hello1','hello2']
liste_world = ['world1','world2']
df = pd.DataFrame(data = {'hello' : liste_hello, 'world': liste_world})
# Writing Dataframe to Impala if table name doesn't exist
db = client.database('default')
if not client.exists_table('helloworld'):
db.create_table('helloworld', df)
t = db['helloworld']
t.execute()
# ====== Reading table ======
# Selecting data with a SQL query
#limit=None to get the whole table, otherwise will only get 10000 first lines
requete = client.sql('select * from helloworld')
df = requete.execute(limit=None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment