animeshk08/create_schema.py

## create_schema.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import time
import pandas as pd
import argparse
from elasticsearch import Elasticsearch


def create_schema(index, file_name=None):
    """ Fetches the indexes of an ElasticSearch document and create a schema"""

    es = Elasticsearch()

    dict_index_fields = {}

    # fetch all the indexes
    mapping = es.indices.get_mapping(index)
    for field in mapping[index]['mappings']['items']['properties']:
        dict_index_fields[field] = [field]

    print("Indexes fetched are %s", dict_index_fields.keys())

    default_fields = {
        'uuid': ['uuid', 'keyword', 'true', "'Perceval UUID.'"]
    }

    non_default_fields_values = ['keyword', 'true', "'NA'"]

    for field in dict_index_fields:
        if field in default_fields:
            dict_index_fields[field] = default_fields[field]
        else:
            dict_index_fields[field].extend(non_default_fields_values)

    df_columns = ['name', 'type', 'aggregatable', 'description']

    # convert the dictionary to a dataframe and sort base on 'name'
    df = pd.DataFrame(columns=df_columns, data=list(dict_index_fields.values()))
    df.sort_values('name')

    # convert the dataframe to a csv
    df.to_csv(file_name, sep=',', index=False)

    print("Schema created in file:", file_name)


if __name__ == '__main__':
    start_time = time.time()

    parser = argparse.ArgumentParser(
        description="Simple parser for getting index and filename"
    )

    # Default file name is 'schema.csv'
    parser.add_argument("-f", "--file", default='schema.csv',
                        help="Name of file to store the schema. Default value is schema.csv")

    # Positional index argument
    parser.add_argument("index",
                        help="Index to convert into schema")

    args = parser.parse_args()

    create_schema(args.index, args.file)

    print("\nTime lapsed:", time.time() - start_time)
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import time
	import pandas as pd
	import argparse
	from elasticsearch import Elasticsearch


	def create_schema(index, file_name=None):
	""" Fetches the indexes of an ElasticSearch document and create a schema"""

	es = Elasticsearch()

	dict_index_fields = {}

	# fetch all the indexes
	mapping = es.indices.get_mapping(index)
	for field in mapping[index]['mappings']['items']['properties']:
	dict_index_fields[field] = [field]

	print("Indexes fetched are %s", dict_index_fields.keys())

	default_fields = {
	'uuid': ['uuid', 'keyword', 'true', "'Perceval UUID.'"]
	}

	non_default_fields_values = ['keyword', 'true', "'NA'"]

	for field in dict_index_fields:
	if field in default_fields:
	dict_index_fields[field] = default_fields[field]
	else:
	dict_index_fields[field].extend(non_default_fields_values)

	df_columns = ['name', 'type', 'aggregatable', 'description']

	# convert the dictionary to a dataframe and sort base on 'name'
	df = pd.DataFrame(columns=df_columns, data=list(dict_index_fields.values()))
	df.sort_values('name')

	# convert the dataframe to a csv
	df.to_csv(file_name, sep=',', index=False)

	print("Schema created in file:", file_name)


	if __name__ == '__main__':
	start_time = time.time()

	parser = argparse.ArgumentParser(
	description="Simple parser for getting index and filename"
	)

	# Default file name is 'schema.csv'
	parser.add_argument("-f", "--file", default='schema.csv',
	help="Name of file to store the schema. Default value is schema.csv")

	# Positional index argument
	parser.add_argument("index",
	help="Index to convert into schema")

	args = parser.parse_args()

	create_schema(args.index, args.file)

	print("\nTime lapsed:", time.time() - start_time)