Last active
August 29, 2015 14:07
-
-
Save pmanvi/b34a1d69b21326b50878 to your computer and use it in GitHub Desktop.
Searching enron mails with ES
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'pmanvi' | |
from elasticsearch import Elasticsearch | |
from elasticsearch_dsl import Search, Q | |
es = Elasticsearch([{'host': 'localhost'},{'port': 9200}]) | |
count = Search(using=es).index("enron-email").count() | |
print(count) | |
s = Search(using=es).index("enron-email").query("match_all") #.query("match", message_body="test") | |
s.aggs.bucket('from_tags', 'terms', field='from') | |
s.aggs.bucket('to_tags', 'terms', field='to') | |
response = s.execute() | |
print(response) | |
print("\n=========== Top 10 from mails ===========\n") | |
for b in response.aggregations.from_tags.buckets: | |
print(b["key"]+" , "+str(b["doc_count"])) | |
print("=============================================") | |
print("\n=========== Top 10 to mails ===========\n") | |
for b in response.aggregations.to_tags.buckets: | |
print(b["key"]+" , "+str(b["doc_count"])) | |
print("=============================================") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment