Skip to content

Instantly share code, notes, and snippets.

@Loveforkeeps
Created January 18, 2018 08:08
Show Gist options
  • Save Loveforkeeps/2f1b97b44611c962ad07af7300ac0664 to your computer and use it in GitHub Desktop.
Save Loveforkeeps/2f1b97b44611c962ad07af7300ac0664 to your computer and use it in GitHub Desktop.
文件按行去重
# encoding:utf-8
import io
import sys
import os
def uniq_set(file):
u_set = set()
with io.open(file,"r") as f:
for line in f.read().splitlines():
if line == "":
continue
u_set.add(line)
# print line
with io.open(file,"w") as f1:
for line in u_set:
f1.writelines(line+'\n')
print(u"%s :deduplicate succesful!" % file)
def uniq_set2(file):
lines_seen = set()
outfile = open(file+"_dedup", "w")
for line in open(file, "r"):
if line not in lines_seen:
outfile.write(line)
lines_seen.add(line)
outfile.close()
def main():
if len(sys.argv) == 2:
uniq_set(sys.argv[1])
else:
print(u"Please asign a file!")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment