Skip to content

Instantly share code, notes, and snippets.

@chenha0
Last active December 30, 2015 07:59
Show Gist options
  • Save chenha0/7799545 to your computer and use it in GitHub Desktop.
Save chenha0/7799545 to your computer and use it in GitHub Desktop.
find intersection among files
import sys
def parse(filename, limit):
freqs = []
with open(filename) as f:
for l in f.readlines():
l = l.strip().split()
freqs.append((float(l[1]), l[0]))
freqs.sort(reverse=True)
if limit > len(freqs): limit = len(freqs)
result = set([t[1] for t in freqs[:limit]])
return result
if __name__ == '__main__':
if (len(sys.argv) < 4):
print >>sys.stderr, "Usage: python inter.py limit filename1 filename2 ... "
limit = int(sys.argv[1])
setlist = [parse(filename, limit) for filename in sys.argv[2:]]
final = setlist[0]
for s in setlist[1:]:
final.intersection_update(s)
for word in final:
print word
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment