Skip to content

Instantly share code, notes, and snippets.

@pashri
Last active August 9, 2022 15:58
Show Gist options
  • Save pashri/cf0f52e3bdafd5c7d145ee782c350208 to your computer and use it in GitHub Desktop.
Save pashri/cf0f52e3bdafd5c7d145ee782c350208 to your computer and use it in GitHub Desktop.
Validate GSA unique entity identifier (UEI)
from functools import reduce
from itertools import starmap
import re
from typing import Iterable
def checksum(uei: str) -> bool:
"""Gets a checksum of a UEI"""
def reducer(chars: Iterable[int]) -> int:
return reduce(
lambda x, y: x+y,
starmap(
lambda index, char: (char * index) % 10,
enumerate(chars, 1),
),
0,
)
assert len(uei) == 12
chars = map(ord, uei[:-1])
sum_ = reducer(chars)
while sum_ > 9:
sum_ = reducer(tuple(int(d) for d in str(sum_)))
return sum_ == int(uei[-1])
def uei_is_plausible(uei: str) -> bool:
"""Returns `True` if a UEI is a plausible UEI, else `False`.
After running this method, an API should be called
to check whether the UEI actually exists.
A GSA unique entity identifier (UEI) has the following characteristics:
- The Unique Entity ID is a 12-character, alphanumeric value.
- The letters “O” and “I” are not used to avoid confusion with zero and one.
- The first character is not zero to avoid cutting off digits that can occur
during data imports, for example, when importing data into spreadsheet
programs.
- Nine-digit sequences are not used in the identifier to avoid collision
with the nine-digit DUNS Number or Taxpayer Identification Number (TIN).
- The first five characters are structured to avoid collision with the
Commercial and Government Entity code formatting or CAGE code.
- The Unique Entity ID is not case sensitive.
- The final character is a checksum of the first 11 characters.
Checksums are used to detect errors within data.
Examples:
uei_is_plausible('VN1AJFAD19J9') # Valid
uei_is_plausible('99999999f995') # Valid
uei_is_plausible('ABCDEF12345') # Invalid: too few characters
uei_is_plausible('io10io10io19') # Invalid: contains 'I' and/or 'O'
uei_is_plausible('123456789FF4') # Invalid: too many consecutive digits
uei_is_plausible('1A2B3C4D5F6G') # Invalid: incorrect checksum
See also: GSA implementation at https://github.com/GSA-TTS/uei-js/
"""
# The Unique Entity ID is not case sensitive
uei = str(uei).upper()
valid = True
# The Unique Entity ID is a 12-character, alphanumeric value
if len(uei) != 12 or not (uei.isalnum() and uei.isascii()):
valid = False
# The letters “O” and “I” are not included
elif ('O' in uei) or ('I' in uei):
valid = False
# The first character is not zero
elif uei[0] == '0':
valid = False
# Nine-digit sequences are not used
elif re.match(r'\d{9}', uei):
valid = False
# The final character is a checksum of the first 11 characters
elif not uei[-1].isdigit() or not checksum(uei):
valid = False
return valid
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment