Skip to content

Instantly share code, notes, and snippets.

@SohaibAnwaar
Created June 2, 2022 12:00
Show Gist options
  • Save SohaibAnwaar/ac631be3ad33ec91437781db2c5bd357 to your computer and use it in GitHub Desktop.
Save SohaibAnwaar/ac631be3ad33ec91437781db2c5bd357 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding: utf-8
# Here's a small python task for you to solve. These are the instructions to write your code. Feel free to use google, but please be prepared to discuss the solution with me:
#
# By running your code, the input file my-test-data.json will be read
# and then ask the user to insert a sku code in order to find its closest similar skus.
#
# It will check for irregularities in the format of the inserted sku code and throw an exception if one was found.
#
# Then, it creates a matching score with all skus depending on digit importance
# and prints out the 10 most similar skus.
#
# my-test-data.json input file is attached with this email.
#
# Good luck,
# Nischal
#
# # Example
#
# The following will be the output if sku-123 is inserted as an input.
#
# sku-123: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-1", "att-e": "att-e-7", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-8", "att-i": "att-i-7", "att-j": "att-j-1"} is more similar to
# sku-12507: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-1", "att-e": "att-e-12", "att-f": "att-f-5", "att-g": "att-g-6", "att-h": "att-h-8", "att-i": "att-i-15", "att-j": "att-j-8"} than to
# sku-11992: {"att-a": "att-a-11", "att-b": "att-b-5", "att-c": "att-c-4", "att-d": "att-d-1", "att-e": "att-e-7", "att-f": "att-f-13", "att-g": "att-g-1", "att-h": "att-h-8", "att-i": "att-i-7", "att-j": "att-j-4"} than to
# sku-954: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-13", "att-e": "att-e-1", "att-f": "att-f-10", "att-g": "att-g-3", "att-h": "att-h-15", "att-i": "att-i-6", "att-j": "att-j-3"} than to
# sku-3613: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-1", "att-e": "att-e-13", "att-f": "att-f-5", "att-g": "att-g-1", "att-h": "att-h-3", "att-i": "att-i-10", "att-j": "att-j-8"} than to
# sku-4196: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-5", "att-d": "att-d-13", "att-e": "att-e-7", "att-f": "att-f-9", "att-g": "att-g-1", "att-h": "att-h-11", "att-i": "att-i-13", "att-j": "att-j-4"} than to
# sku-7956: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-12", "att-e": "att-e-7", "att-f": "att-f-7", "att-g": "att-g-1", "att-h": "att-h-6", "att-i": "att-i-1", "att-j": "att-j-13"} than to
# sku-13091: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-10", "att-d": "att-d-14", "att-e": "att-e-7", "att-f": "att-f-1", "att-g": "att-g-12", "att-h": "att-h-6", "att-i": "att-i-12", "att-j": "att-j-1"} than to
# sku-9697: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-3", "att-d": "att-d-4", "att-e": "att-e-3", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-12", "att-i": "att-i-6", "att-j": "att-j-2"} than to
# sku-11470: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-15", "att-d": "att-d-3", "att-e": "att-e-2", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-3", "att-i": "att-i-11", "att-j": "att-j-13"} than to
# sku-1023: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-13", "att-e": "att-e-8", "att-f": "att-f-10", "att-g": "att-g-12", "att-h": "att-h-2", "att-i": "att-i-8", "att-j": "att-j-1"}
import json
filename = '/Users/sohaibanwar/Downloads/my-test-data.json'
with open(filename, 'r') as f:
data = json.load(f)
from itertools import islice
sku_input="sku-123"
def take(n, iterable):
"Return first n items of the iterable as a list"
return list(islice(iterable, n))
def get_matched_indexes(record1, record2):
indexes = [index for index,k in enumerate(record1) if k in record2 and record1[k] == record2[k]]
return indexes
def check_key_val(sku_input , data):
indexes = dict()
sku1_data = data[sku_input]
for sku2 in data:
# Checking similar keys and values
indexes[sku2] = get_matched_indexes(sku1_data, data[sku2])
indexes = {i[0]:i[1] for i in sorted(indexes.items(), key=lambda x: x[1], reverse=False) if len(i[1]) > 0}
smilar_10 = dict()
for index in range(0,9):
temp_indexes = {i[0]:i[1] for i in indexes.items() if i[1][0]==index }
smilar_10.update({i[0]:i[1] for i in sorted(temp_indexes.items(), key=lambda x: len(x[1]), reverse=True) if len(i[1]) > 0})
if len(smilar_10.items())>=10:
break
similar_10 = take(10, smilar_10.items())
return [i[0] for i in similar_10]
similar_10 = check_key_val(sku_input , data)
for i in similar_10:
print(f"{i} {data[i]}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment