Created
April 11, 2020 08:49
-
-
Save animeshk08/0a8dafa66826137032efb6c771074d1d to your computer and use it in GitHub Desktop.
Used to fetch the indexes of ElasticSearch and create a schema.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import time | |
import pandas as pd | |
import argparse | |
from elasticsearch import Elasticsearch | |
def create_schema(index, file_name=None): | |
""" Fetches the indexes of an ElasticSearch document and create a schema""" | |
es = Elasticsearch() | |
dict_index_fields = {} | |
# fetch all the indexes | |
mapping = es.indices.get_mapping(index) | |
for field in mapping[index]['mappings']['items']['properties']: | |
dict_index_fields[field] = [field] | |
print("Indexes fetched are %s", dict_index_fields.keys()) | |
default_fields = { | |
'uuid': ['uuid', 'keyword', 'true', "'Perceval UUID.'"] | |
} | |
non_default_fields_values = ['keyword', 'true', "'NA'"] | |
for field in dict_index_fields: | |
if field in default_fields: | |
dict_index_fields[field] = default_fields[field] | |
else: | |
dict_index_fields[field].extend(non_default_fields_values) | |
df_columns = ['name', 'type', 'aggregatable', 'description'] | |
# convert the dictionary to a dataframe and sort base on 'name' | |
df = pd.DataFrame(columns=df_columns, data=list(dict_index_fields.values())) | |
df.sort_values('name') | |
# convert the dataframe to a csv | |
df.to_csv(file_name, sep=',', index=False) | |
print("Schema created in file:", file_name) | |
if __name__ == '__main__': | |
start_time = time.time() | |
parser = argparse.ArgumentParser( | |
description="Simple parser for getting index and filename" | |
) | |
# Default file name is 'schema.csv' | |
parser.add_argument("-f", "--file", default='schema.csv', | |
help="Name of file to store the schema. Default value is schema.csv") | |
# Positional index argument | |
parser.add_argument("index", | |
help="Index to convert into schema") | |
args = parser.parse_args() | |
create_schema(args.index, args.file) | |
print("\nTime lapsed:", time.time() - start_time) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment