Created
February 22, 2018 11:56
-
-
Save alebaffa/09f61f79e625ee85b20deb5393132961 to your computer and use it in GitHub Desktop.
elasticsearch client from jupyter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Import all the needed libraries | |
from elasticsearch import helpers | |
from elasticsearch import Elasticsearch | |
import os | |
import sys | |
import json | |
import logging.config | |
from elasticsearch import RequestsHttpConnection | |
## Create connection using proxy | |
class MyConnection(RequestsHttpConnection): | |
def __init__(self, *args, **kwargs): | |
proxies = kwargs.pop('proxies', {}) | |
super(MyConnection, self).__init__(*args, **kwargs) | |
self.session.proxies = proxies | |
## Create Elasticsearch client | |
es = Elasticsearch('es_host:es_port', | |
connection_class=MyConnection, | |
proxies = {'http': 'proxy_host:proxy_port'}) | |
## If you want to print all the indexes in Elasticsearch | |
for index in es.indices.get('*'): | |
print(index) | |
## Change index and doc_type depending on the kind of search you want to do | |
index = 'your_index' | |
doc_type = 'your_doc_type' | |
size = 1000 # More info: http://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan | |
docs = helpers.scan( | |
es, | |
query={'query': {'match_all': {}}}, | |
index=index, | |
doc_type=doc_type, | |
size=size) | |
# Now that you have the data, you can create a cool DataFrame with pandas | |
df = pd.DataFrame([{ | |
'field_1': doc['parent_field']['field_1'], | |
'field_2': child_field['field_2'], | |
'field_3': child_field['field_3']} | |
for doc in docs for child_field in doc['parent_field']['child_field']]) | |
df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment