Skip to content

Instantly share code, notes, and snippets.

@alexkay
Created January 12, 2012 14:08
Show Gist options
  • Save alexkay/1600704 to your computer and use it in GitHub Desktop.
Save alexkay/1600704 to your computer and use it in GitHub Desktop.
Code Sprint 2 - Fraud Prevention
#!/usr/bin/env python
import re
def normalise_email(email):
email = email.lower()
i = email.find("@")
prefix = email[:i]
j = prefix.find("+")
if j != -1:
prefix = prefix[:j]
prefix = prefix.replace(".", "")
return prefix + email[i:]
def normalise_address(street, city, state, code):
street = street.strip().lower()
street = re.sub("\s+", " ", street)
street = re.sub(" (street|st.)$", " st", street)
street = re.sub(" (road|rd.)$", " rd", street)
city = city.strip().lower()
city = re.sub("\s+", " ", city)
state = state.strip().lower()
if state == 'california': state = 'ca'
if state == 'illinois': state = 'il'
if state == 'new york': state = 'ny'
code = code.strip()
return "%s,%s,%s,%s" % (street, city, state, code)
def process(oid, did, key, cc, stash, orders):
# For each combination of the deal id and the key we are going to keep
# a tuple of the CC and a set of order ids where it was used.
stash_key = str(did) + "," + key
if stash_key not in stash:
stash[stash_key] = (cc, set([oid]))
else:
# The deal,key combination has been seen before.
prev_cc, oid_set = stash[stash_key]
if cc != prev_cc:
# New CC number, mark the current as the previous orders as fraudulent.
orders.add(oid)
orders.update(oid_set)
# Clear the set of orders so that all future orders are marked as
# fraudulent, regardless of their CC.
oid_set.clear()
else:
if oid_set:
# Same CC, save the order in case we need to mark all of them as
# fraudulent.
oid_set.add(oid)
else:
# Even though the CC is the same, there was a fraudulent order
# for this key in the past, have to mark this one too.
orders.add(oid)
def main():
emails = {}
addresses = {}
orders = set()
N = input()
for n in xrange(N):
oid, did, email, street, city, state, code, cc = raw_input().split(",")
oid, did = int(oid), int(did)
email = normalise_email(email)
address = normalise_address(street, city, state, code)
process(oid, did, email, cc, emails, orders)
process(oid, did, address, cc, addresses, orders)
print ",".join([str(i) for i in sorted(orders)])
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment