Last active
June 5, 2022 20:46
-
-
Save j2abro/9dfa4f08e6772b127ab25d4f566d9fd8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from collections import Counter | |
# Count unique OFACs SDN addresses | |
# https://www.treasury.gov/ofac/downloads/sdnlist.txt | |
# https://home.treasury.gov/policy-issues/financial-sanctions/specially-designated-nationals-and-blocked-persons-list-sdn-human-readable-lists | |
# https://home.treasury.gov/policy-issues/financial-sanctions/faqs/topic/1626 | |
sdn_filename = 'sdn.txt' | |
sdn_entity = [] # Each entity on the list is separated by empty line | |
all_matches = [] | |
all_matches_uniq = [] | |
pattern_address_and_sybmol = "Digital Currency Address\s-\s[A-Z]{3,6}\s[a-zA-z0-9]+" | |
pattern_symbol_only = "\s[A-Z]{3,6}\s" | |
with open(sdn_filename) as f: | |
lines = f.read() | |
paragraph = lines.split("\n\n") | |
# We still have line breaks so put each para into a string | |
for line in paragraph: | |
single_line = ' '.join( line.splitlines()) | |
sdn_entity.append(single_line) | |
# if 'DASH ' in single_line: | |
# print('BINGO', single_line) | |
# Find all addrs | |
for sdn in sdn_entity: | |
matches = re.findall(pattern_address_and_sybmol, sdn, flags=re.IGNORECASE) | |
if matches: | |
for m in matches: | |
all_matches.append(m) | |
# Remove duplicate addrs | |
all_matches_uniq = sorted(set(all_matches)) | |
# Addresses are only needed to get uniq count, so now get symbols | |
all_matches_uniq_symbols = [] | |
total_uniq_matches = 0 | |
for p2 in all_matches_uniq: | |
matches = re.findall(pattern_symbol_only, p2) | |
if matches: | |
for m in matches: | |
total_uniq_matches += 1 | |
all_matches_uniq_symbols.append(m) | |
# print(all_matches_uniq_symbols) | |
c = Counter( all_matches_uniq_symbols ) | |
# Sort by address count | |
sorted = sorted(c.items(), key=lambda tup: tup[1], reverse=True) | |
# Print output for twitter | |
print('Unique addresses on US Treasury OFAC sanction list') | |
for name, count in sorted: | |
print(name, '\t', count) | |
print('Number of SDN addrs: ', len(all_matches)) | |
print('Number of uniq SDN addrs: ', len(all_matches_uniq)) | |
print('@USTreasury #OFAC #AML #KYC') #twitter | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment