Created
June 2, 2022 12:00
-
-
Save SohaibAnwaar/ac631be3ad33ec91437781db2c5bd357 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# Here's a small python task for you to solve. These are the instructions to write your code. Feel free to use google, but please be prepared to discuss the solution with me: | |
# | |
# By running your code, the input file my-test-data.json will be read | |
# and then ask the user to insert a sku code in order to find its closest similar skus. | |
# | |
# It will check for irregularities in the format of the inserted sku code and throw an exception if one was found. | |
# | |
# Then, it creates a matching score with all skus depending on digit importance | |
# and prints out the 10 most similar skus. | |
# | |
# my-test-data.json input file is attached with this email. | |
# | |
# Good luck, | |
# Nischal | |
# | |
# # Example | |
# | |
# The following will be the output if sku-123 is inserted as an input. | |
# | |
# sku-123: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-1", "att-e": "att-e-7", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-8", "att-i": "att-i-7", "att-j": "att-j-1"} is more similar to | |
# sku-12507: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-1", "att-e": "att-e-12", "att-f": "att-f-5", "att-g": "att-g-6", "att-h": "att-h-8", "att-i": "att-i-15", "att-j": "att-j-8"} than to | |
# sku-11992: {"att-a": "att-a-11", "att-b": "att-b-5", "att-c": "att-c-4", "att-d": "att-d-1", "att-e": "att-e-7", "att-f": "att-f-13", "att-g": "att-g-1", "att-h": "att-h-8", "att-i": "att-i-7", "att-j": "att-j-4"} than to | |
# sku-954: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-7", "att-d": "att-d-13", "att-e": "att-e-1", "att-f": "att-f-10", "att-g": "att-g-3", "att-h": "att-h-15", "att-i": "att-i-6", "att-j": "att-j-3"} than to | |
# sku-3613: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-1", "att-e": "att-e-13", "att-f": "att-f-5", "att-g": "att-g-1", "att-h": "att-h-3", "att-i": "att-i-10", "att-j": "att-j-8"} than to | |
# sku-4196: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-5", "att-d": "att-d-13", "att-e": "att-e-7", "att-f": "att-f-9", "att-g": "att-g-1", "att-h": "att-h-11", "att-i": "att-i-13", "att-j": "att-j-4"} than to | |
# sku-7956: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-12", "att-e": "att-e-7", "att-f": "att-f-7", "att-g": "att-g-1", "att-h": "att-h-6", "att-i": "att-i-1", "att-j": "att-j-13"} than to | |
# sku-13091: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-10", "att-d": "att-d-14", "att-e": "att-e-7", "att-f": "att-f-1", "att-g": "att-g-12", "att-h": "att-h-6", "att-i": "att-i-12", "att-j": "att-j-1"} than to | |
# sku-9697: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-3", "att-d": "att-d-4", "att-e": "att-e-3", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-12", "att-i": "att-i-6", "att-j": "att-j-2"} than to | |
# sku-11470: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-15", "att-d": "att-d-3", "att-e": "att-e-2", "att-f": "att-f-10", "att-g": "att-g-1", "att-h": "att-h-3", "att-i": "att-i-11", "att-j": "att-j-13"} than to | |
# sku-1023: {"att-a": "att-a-2", "att-b": "att-b-13", "att-c": "att-c-8", "att-d": "att-d-13", "att-e": "att-e-8", "att-f": "att-f-10", "att-g": "att-g-12", "att-h": "att-h-2", "att-i": "att-i-8", "att-j": "att-j-1"} | |
import json | |
filename = '/Users/sohaibanwar/Downloads/my-test-data.json' | |
with open(filename, 'r') as f: | |
data = json.load(f) | |
from itertools import islice | |
sku_input="sku-123" | |
def take(n, iterable): | |
"Return first n items of the iterable as a list" | |
return list(islice(iterable, n)) | |
def get_matched_indexes(record1, record2): | |
indexes = [index for index,k in enumerate(record1) if k in record2 and record1[k] == record2[k]] | |
return indexes | |
def check_key_val(sku_input , data): | |
indexes = dict() | |
sku1_data = data[sku_input] | |
for sku2 in data: | |
# Checking similar keys and values | |
indexes[sku2] = get_matched_indexes(sku1_data, data[sku2]) | |
indexes = {i[0]:i[1] for i in sorted(indexes.items(), key=lambda x: x[1], reverse=False) if len(i[1]) > 0} | |
smilar_10 = dict() | |
for index in range(0,9): | |
temp_indexes = {i[0]:i[1] for i in indexes.items() if i[1][0]==index } | |
smilar_10.update({i[0]:i[1] for i in sorted(temp_indexes.items(), key=lambda x: len(x[1]), reverse=True) if len(i[1]) > 0}) | |
if len(smilar_10.items())>=10: | |
break | |
similar_10 = take(10, smilar_10.items()) | |
return [i[0] for i in similar_10] | |
similar_10 = check_key_val(sku_input , data) | |
for i in similar_10: | |
print(f"{i} {data[i]}") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment