tomassedovic/eu_content_id.py

## eu_content_id.py
# Copyright 2018 by Tomas Sedovic, all rights reserved
# Contact <tomas@sedovic.cz> for licensing options.

# NOTE: p=0.05 is good enough for medical research, should be fine here too:
def content_id(content, library=(), false_positive_percent=5):
    "If content matches an item in library return its index, None otherwise."
    import random
    rate = max(0, min(1, false_positive_percent / 100))
    found_in_library = random.random() <= rate
    if library and found_in_library:
       return random.randint(0, len(library) - 1)


# Usage:
library = ["Avengers", "Windows 10", "Helter Skelter", "Harry Potter"]
# NOTE: the library can also be a list of {name: name, data: full contents of the works} dicts.
# You can also supply the hashed contents to make the library smaller. The algorithm is very flexible.
>>> for _ in range(10): print(content_id(3.14159265358979323, library, 10))
...
None
None
None
None
None
None
3
None
None
None
	# Copyright 2018 by Tomas Sedovic, all rights reserved
	# Contact <tomas@sedovic.cz> for licensing options.

	# NOTE: p=0.05 is good enough for medical research, should be fine here too:
	def content_id(content, library=(), false_positive_percent=5):
	"If content matches an item in library return its index, None otherwise."
	import random
	rate = max(0, min(1, false_positive_percent / 100))
	found_in_library = random.random() <= rate
	if library and found_in_library:
	return random.randint(0, len(library) - 1)


	# Usage:
	library = ["Avengers", "Windows 10", "Helter Skelter", "Harry Potter"]
	# NOTE: the library can also be a list of {name: name, data: full contents of the works} dicts.
	# You can also supply the hashed contents to make the library smaller. The algorithm is very flexible.
	>>> for _ in range(10): print(content_id(3.14159265358979323, library, 10))
	...
	None
	None
	None
	None
	None
	None
	3
	None
	None
	None