Skip to content

Instantly share code, notes, and snippets.

@abuggia
Created March 22, 2012 13:54
Show Gist options
  • Save abuggia/2158478 to your computer and use it in GitHub Desktop.
Save abuggia/2158478 to your computer and use it in GitHub Desktop.
def process_file(name, f):
res = []
i = 0
for line in f:
add_emails(name, line, res)
add_emails2(name, line, res)
add_phone_numbers(name, line, res)
return res
# ...
# ...
stanford = '.'.join(c for c in '@stanford.edu')
email2_pattern = '(?i)([\S]+%s)' % (stanford)
def add_emails2(file_name, line, res):
matches = re.findall(email2_pattern,line)
if len(matches) > 0:
email = matches[0]
char = repeated_char(email)
if char != '':
email = re.sub(char, '', email)
res.append((file_name,'e', email))
def repeated_char(string):
parts = [c for c in string]
if len(parts) > 2:
if all_eq(parts[::2]):
return parts[0]
elif all_eq(parts[1::2]):
return parts[1]
return ''
def all_eq(arr):
return len(set(arr)) <= 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment