Skip to content

Instantly share code, notes, and snippets.

@alebaffa
Created February 22, 2018 11:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alebaffa/09f61f79e625ee85b20deb5393132961 to your computer and use it in GitHub Desktop.
Save alebaffa/09f61f79e625ee85b20deb5393132961 to your computer and use it in GitHub Desktop.
elasticsearch client from jupyter
## Import all the needed libraries
from elasticsearch import helpers
from elasticsearch import Elasticsearch
import os
import sys
import json
import logging.config
from elasticsearch import RequestsHttpConnection
## Create connection using proxy
class MyConnection(RequestsHttpConnection):
def __init__(self, *args, **kwargs):
proxies = kwargs.pop('proxies', {})
super(MyConnection, self).__init__(*args, **kwargs)
self.session.proxies = proxies
## Create Elasticsearch client
es = Elasticsearch('es_host:es_port',
connection_class=MyConnection,
proxies = {'http': 'proxy_host:proxy_port'})
## If you want to print all the indexes in Elasticsearch
for index in es.indices.get('*'):
print(index)
## Change index and doc_type depending on the kind of search you want to do
index = 'your_index'
doc_type = 'your_doc_type'
size = 1000 # More info: http://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
docs = helpers.scan(
es,
query={'query': {'match_all': {}}},
index=index,
doc_type=doc_type,
size=size)
# Now that you have the data, you can create a cool DataFrame with pandas
df = pd.DataFrame([{
'field_1': doc['parent_field']['field_1'],
'field_2': child_field['field_2'],
'field_3': child_field['field_3']}
for doc in docs for child_field in doc['parent_field']['child_field']])
df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment