SohaibAnwaar/Get Similar Skus

## Get Similar Skus
#!/usr/bin/env python
# coding: utf-8

# Here's a small python task for you to solve. These are the instructions to write your code. Feel free to use google, but please be prepared to discuss the solution with me:
#
# By running your code, the input file my-test-data.json will be read
# and then ask the user to insert a sku code in order to find its closest similar skus.
#
# It will check for irregularities in the format of the inserted sku code and throw an exception if one was found.
#
# Then, it creates a matching score with all skus depending on digit importance
# and prints out the 10 most similar skus.
#
# my-test-data.json input file is attached with this email.
#
# Good luck,
# Nischal
#
# # Example
#
# The following will be the output if sku-123 is inserted as an input.
#
# sku-123: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-1", "att-e": "att-e-7", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-8", "att-i": "att-i-7", "att-j": "att-j-1"} is more similar to
# sku-12507: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-1", "att-e": "att-e-12", "att-f": "att-f-5", "att-g": "att-g-6", "att-h": "att-h-8", "att-i": "att-i-15", "att-j": "att-j-8"} than to
# sku-11992: {"att-a": "att-a-11", "att-b": "att-b-5", "att-c": "att-c-4", "att-d": "att-d-1", "att-e": "att-e-7", "att-f": "att-f-13", "att-g": "att-g-1", "att-h": "att-h-8", "att-i": "att-i-7", "att-j": "att-j-4"} than to
# sku-954: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-13", "att-e": "att-e-1", "att-f": "att-f-10", "att-g": "att-g-3", "att-h": "att-h-15", "att-i": "att-i-6", "att-j": "att-j-3"} than to
# sku-3613: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-1", "att-e": "att-e-13", "att-f": "att-f-5", "att-g": "att-g-1", "att-h": "att-h-3", "att-i": "att-i-10", "att-j": "att-j-8"} than to
# sku-4196: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-5", "att-d": "att-d-13", "att-e": "att-e-7", "att-f": "att-f-9", "att-g": "att-g-1", "att-h": "att-h-11", "att-i": "att-i-13", "att-j": "att-j-4"} than to
# sku-7956: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-12", "att-e": "att-e-7", "att-f": "att-f-7", "att-g": "att-g-1", "att-h": "att-h-6", "att-i": "att-i-1", "att-j": "att-j-13"} than to
# sku-13091: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-10", "att-d": "att-d-14", "att-e": "att-e-7", "att-f": "att-f-1", "att-g": "att-g-12", "att-h": "att-h-6", "att-i": "att-i-12", "att-j": "att-j-1"} than to
# sku-9697: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-3", "att-d": "att-d-4", "att-e": "att-e-3", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-12", "att-i": "att-i-6", "att-j": "att-j-2"} than to
# sku-11470: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-15", "att-d": "att-d-3", "att-e": "att-e-2", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-3", "att-i": "att-i-11", "att-j": "att-j-13"} than to
# sku-1023: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-13", "att-e": "att-e-8", "att-f": "att-f-10", "att-g": "att-g-12", "att-h": "att-h-2", "att-i": "att-i-8", "att-j": "att-j-1"}


import json

filename = '/Users/sohaibanwar/Downloads/my-test-data.json'
with open(filename, 'r') as f:
    data = json.load(f)


from itertools import islice

sku_input="sku-123"

def take(n, iterable):
    "Return first n items of the iterable as a list"
    return list(islice(iterable, n))

def get_matched_indexes(record1, record2):

    indexes = [index for index,k in enumerate(record1) if k in record2 and record1[k] == record2[k]]
    return indexes


def check_key_val(sku_input , data):
    indexes = dict()
    sku1_data = data[sku_input]
    for sku2 in data:
        # Checking similar keys and values
        indexes[sku2] = get_matched_indexes(sku1_data, data[sku2])

    indexes = {i[0]:i[1] for i in sorted(indexes.items(), key=lambda x: x[1], reverse=False) if len(i[1]) > 0}
    smilar_10 = dict()
    for index in range(0,9):
        temp_indexes = {i[0]:i[1] for i in indexes.items() if i[1][0]==index  }
        smilar_10.update({i[0]:i[1] for i in sorted(temp_indexes.items(), key=lambda x: len(x[1]), reverse=True) if len(i[1]) > 0})
        if len(smilar_10.items())>=10:
            break
    similar_10 = take(10, smilar_10.items())
    return [i[0] for i in similar_10]

similar_10 = check_key_val(sku_input , data)

for i in similar_10:
    print(f"{i} {data[i]}")
	#!/usr/bin/env python
	# coding: utf-8

	# Here's a small python task for you to solve. These are the instructions to write your code. Feel free to use google, but please be prepared to discuss the solution with me:
	#
	# By running your code, the input file my-test-data.json will be read
	# and then ask the user to insert a sku code in order to find its closest similar skus.
	#
	# It will check for irregularities in the format of the inserted sku code and throw an exception if one was found.
	#
	# Then, it creates a matching score with all skus depending on digit importance
	# and prints out the 10 most similar skus.
	#
	# my-test-data.json input file is attached with this email.
	#
	# Good luck,
	# Nischal
	#
	# # Example
	#
	# The following will be the output if sku-123 is inserted as an input.
	#
	# sku-123: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-1", "att-e": "att-e-7", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-8", "att-i": "att-i-7", "att-j": "att-j-1"} is more similar to
	# sku-12507: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-1", "att-e": "att-e-12", "att-f": "att-f-5", "att-g": "att-g-6", "att-h": "att-h-8", "att-i": "att-i-15", "att-j": "att-j-8"} than to
	# sku-11992: {"att-a": "att-a-11", "att-b": "att-b-5", "att-c": "att-c-4", "att-d": "att-d-1", "att-e": "att-e-7", "att-f": "att-f-13", "att-g": "att-g-1", "att-h": "att-h-8", "att-i": "att-i-7", "att-j": "att-j-4"} than to
	# sku-954: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-13", "att-e": "att-e-1", "att-f": "att-f-10", "att-g": "att-g-3", "att-h": "att-h-15", "att-i": "att-i-6", "att-j": "att-j-3"} than to
	# sku-3613: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-1", "att-e": "att-e-13", "att-f": "att-f-5", "att-g": "att-g-1", "att-h": "att-h-3", "att-i": "att-i-10", "att-j": "att-j-8"} than to
	# sku-4196: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-5", "att-d": "att-d-13", "att-e": "att-e-7", "att-f": "att-f-9", "att-g": "att-g-1", "att-h": "att-h-11", "att-i": "att-i-13", "att-j": "att-j-4"} than to
	# sku-7956: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-12", "att-e": "att-e-7", "att-f": "att-f-7", "att-g": "att-g-1", "att-h": "att-h-6", "att-i": "att-i-1", "att-j": "att-j-13"} than to
	# sku-13091: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-10", "att-d": "att-d-14", "att-e": "att-e-7", "att-f": "att-f-1", "att-g": "att-g-12", "att-h": "att-h-6", "att-i": "att-i-12", "att-j": "att-j-1"} than to
	# sku-9697: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-3", "att-d": "att-d-4", "att-e": "att-e-3", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-12", "att-i": "att-i-6", "att-j": "att-j-2"} than to
	# sku-11470: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-15", "att-d": "att-d-3", "att-e": "att-e-2", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-3", "att-i": "att-i-11", "att-j": "att-j-13"} than to
	# sku-1023: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-13", "att-e": "att-e-8", "att-f": "att-f-10", "att-g": "att-g-12", "att-h": "att-h-2", "att-i": "att-i-8", "att-j": "att-j-1"}




	import json

	filename = '/Users/sohaibanwar/Downloads/my-test-data.json'
	with open(filename, 'r') as f:
	data = json.load(f)





	from itertools import islice

	sku_input="sku-123"

	def take(n, iterable):
	"Return first n items of the iterable as a list"
	return list(islice(iterable, n))

	def get_matched_indexes(record1, record2):

	indexes = [index for index,k in enumerate(record1) if k in record2 and record1[k] == record2[k]]
	return indexes


	def check_key_val(sku_input , data):
	indexes = dict()
	sku1_data = data[sku_input]
	for sku2 in data:
	# Checking similar keys and values
	indexes[sku2] = get_matched_indexes(sku1_data, data[sku2])

	indexes = {i[0]:i[1] for i in sorted(indexes.items(), key=lambda x: x[1], reverse=False) if len(i[1]) > 0}
	smilar_10 = dict()
	for index in range(0,9):
	temp_indexes = {i[0]:i[1] for i in indexes.items() if i[1][0]==index }
	smilar_10.update({i[0]:i[1] for i in sorted(temp_indexes.items(), key=lambda x: len(x[1]), reverse=True) if len(i[1]) > 0})
	if len(smilar_10.items())>=10:
	break
	similar_10 = take(10, smilar_10.items())
	return [i[0] for i in similar_10]

	similar_10 = check_key_val(sku_input , data)

	for i in similar_10:
	print(f"{i} {data[i]}")