Skip to content

Instantly share code, notes, and snippets.

@pcdummy pcdummy/sql_sync_algo.py
Last active Dec 16, 2016

Embed
What would you like to do?
An algorythm to compare SQL Primary keys and sync A -> B.
# -*- coding: utf-8 -*-
import random
def compare(all_read_pks, all_write_pks):
"""This function emulates a mathematical problem i try to solve.
I have primary keys from a SQL Table A and primary_keys in SQL Table B.
This function should compare B with A and sync all primary_keys changes to B.
The biggest problem is here that i have to compare numbers in blocks and compare
a single block until its fully synced.
""" # noqa
block_size = 100
compared = 0
to_compare = len(all_read_pks)
if len(all_write_pks) > len(all_read_pks):
to_compare = len(all_write_pks)
while compared < to_compare:
read_pks = set(list(all_read_pks)[compared:compared + block_size])
write_pks = set(list(all_write_pks)[compared:compared + block_size])
if read_pks == write_pks:
compared += block_size
continue
matching = read_pks & write_pks
for i in write_pks - matching:
all_write_pks.remove(i)
for i in read_pks - write_pks:
all_write_pks.add(i)
if read_pks == write_pks:
compared += len(read_pks)
continue
return all_read_pks == all_write_pks
if __name__ == '__main__':
seta = set(range(1000))
setb = set(range(1000))
# Remove random rows from both sets "tables"
numbers1 = set(random.sample(range(1000), 500))
numbers2 = set(random.sample(range(500, 1000), 250))
# Compare with 500 random rows removed from both tables.
print("Remove in both is: ", compare(seta - numbers1, setb - numbers2))
# Compare with 500 random rows removed from source only
print("Remove in source is: ", compare(seta - numbers1, setb))
# Compare with 500 random rows removed from target only
print("Remove in target is: ", compare(seta, setb - numbers2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.