An algorythm to compare SQL Primary keys and sync A -> B.
# -*- coding: utf-8 -*- | |
import random | |
def compare(all_read_pks, all_write_pks): | |
"""This function emulates a mathematical problem i try to solve. | |
I have primary keys from a SQL Table A and primary_keys in SQL Table B. | |
This function should compare B with A and sync all primary_keys changes to B. | |
The biggest problem is here that i have to compare numbers in blocks and compare | |
a single block until its fully synced. | |
""" # noqa | |
block_size = 100 | |
compared = 0 | |
to_compare = len(all_read_pks) | |
if len(all_write_pks) > len(all_read_pks): | |
to_compare = len(all_write_pks) | |
while compared < to_compare: | |
read_pks = set(list(all_read_pks)[compared:compared + block_size]) | |
write_pks = set(list(all_write_pks)[compared:compared + block_size]) | |
if read_pks == write_pks: | |
compared += block_size | |
continue | |
matching = read_pks & write_pks | |
for i in write_pks - matching: | |
all_write_pks.remove(i) | |
for i in read_pks - write_pks: | |
all_write_pks.add(i) | |
if read_pks == write_pks: | |
compared += len(read_pks) | |
continue | |
return all_read_pks == all_write_pks | |
if __name__ == '__main__': | |
seta = set(range(1000)) | |
setb = set(range(1000)) | |
# Remove random rows from both sets "tables" | |
numbers1 = set(random.sample(range(1000), 500)) | |
numbers2 = set(random.sample(range(500, 1000), 250)) | |
# Compare with 500 random rows removed from both tables. | |
print("Remove in both is: ", compare(seta - numbers1, setb - numbers2)) | |
# Compare with 500 random rows removed from source only | |
print("Remove in source is: ", compare(seta - numbers1, setb)) | |
# Compare with 500 random rows removed from target only | |
print("Remove in target is: ", compare(seta, setb - numbers2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment