Skip to content

Instantly share code, notes, and snippets.

@mikaelj
Last active November 22, 2015 09:51
Show Gist options
  • Save mikaelj/1ceb099cff15c8944250 to your computer and use it in GitHub Desktop.
Save mikaelj/1ceb099cff15c8944250 to your computer and use it in GitHub Desktop.
Print out duplicate entries
## untested!
def main():
try:
f = open("r", sys.argv[1])
except:
print "usage: %s file" % (sys.argv[0])
entries = {}
for line in f.readlines():
if not entries.has_item(line):
entries[line] = 1
else:
entries[line] += 1
for entry, count in entries.items():
if count > 1:
print "%s: %d occurances" % (entry, count)
main()
#!/usr/bin/env python
def hash(r):
"""
hashes 'ABCxyz'.
ABC is always uppercase. Some characters not present.
A, B, C is 0..28, i.e. 5 bits == A<<10 | B<<5 | C.
Max size of xyz is 999 i.e. 10 bits.
Therefore, format:
A<<20 | B<<15 | C<<10 | xyz
"""
assert len(r) == 6
base = ord('A')
return (ord(r[0])-base) << 20 | \
(ord(r[1])-base) << 15 | \
(ord(r[2])-base) << 10 | \
(int(r[3])*100) + (int(r[4])*10) + int(r[5])
def main():
hashes = [0] * (1<<25)
duplicates = False
for line in open("regnr.txt", "r"):
h = hash(line[:6])
hashes[h] += 1
if hashes[h] > 1:
duplicates = True
break
if duplicates:
print "duplicates"
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment