Skip to content

Instantly share code, notes, and snippets.

@animeshk08
Created April 11, 2020 08:49
Show Gist options
  • Save animeshk08/0a8dafa66826137032efb6c771074d1d to your computer and use it in GitHub Desktop.
Save animeshk08/0a8dafa66826137032efb6c771074d1d to your computer and use it in GitHub Desktop.
Used to fetch the indexes of ElasticSearch and create a schema.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import time
import pandas as pd
import argparse
from elasticsearch import Elasticsearch
def create_schema(index, file_name=None):
""" Fetches the indexes of an ElasticSearch document and create a schema"""
es = Elasticsearch()
dict_index_fields = {}
# fetch all the indexes
mapping = es.indices.get_mapping(index)
for field in mapping[index]['mappings']['items']['properties']:
dict_index_fields[field] = [field]
print("Indexes fetched are %s", dict_index_fields.keys())
default_fields = {
'uuid': ['uuid', 'keyword', 'true', "'Perceval UUID.'"]
}
non_default_fields_values = ['keyword', 'true', "'NA'"]
for field in dict_index_fields:
if field in default_fields:
dict_index_fields[field] = default_fields[field]
else:
dict_index_fields[field].extend(non_default_fields_values)
df_columns = ['name', 'type', 'aggregatable', 'description']
# convert the dictionary to a dataframe and sort base on 'name'
df = pd.DataFrame(columns=df_columns, data=list(dict_index_fields.values()))
df.sort_values('name')
# convert the dataframe to a csv
df.to_csv(file_name, sep=',', index=False)
print("Schema created in file:", file_name)
if __name__ == '__main__':
start_time = time.time()
parser = argparse.ArgumentParser(
description="Simple parser for getting index and filename"
)
# Default file name is 'schema.csv'
parser.add_argument("-f", "--file", default='schema.csv',
help="Name of file to store the schema. Default value is schema.csv")
# Positional index argument
parser.add_argument("index",
help="Index to convert into schema")
args = parser.parse_args()
create_schema(args.index, args.file)
print("\nTime lapsed:", time.time() - start_time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment