Skip to content

Instantly share code, notes, and snippets.

@jchaffin
Created April 17, 2018 01:38
Show Gist options
  • Save jchaffin/e1f7ef8f27e389a30adf1a270743c0d0 to your computer and use it in GitHub Desktop.
Save jchaffin/e1f7ef8f27e389a30adf1a270743c0d0 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
"""
Copyright 2017 Jacob Chaffin
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Please see <http://www.gnu.org/licenses/> for a copy of the license.
"""
import random, sys, locale, string
from argparse import ArgumentParser, FileType
def lines(filecontents):
lines_list = []
for line in filecontents.readlines():
lines_list.append(line.strip())
return lines_list
def main():
description_msg="compare two sorted files line by line"
parser = ArgumentParser(description=description_msg)
# Positional Arguments
parser.add_argument('FILE1', type=FileType('r'))
parser.add_argument('FILE2', type=FileType('r'))
# Optional Arguments
parser.add_argument(
"-1",
help="suppress column 1 (lines unique to FILE1)",
dest="one",
action="store_true"
)
parser.add_argument(
"-2",
help="suppress column 2 (lines unique to FILE2)",
dest="two",
action="store_true"
)
parser.add_argument(
"-3",
help="suppress column 3 (lines that appear in both files)",
dest="three",
action="store_true"
)
parser.add_argument(
"-u",
help="sort inputs",
dest="sort",
action="store_true"
)
args = parser.parse_args()
file1 = lines(args.FILE1)
file2 = lines(args.FILE2)
column_one = []
column_two = []
column_three = []
for line1, line2 in zip(file1, file2):
column_one.append(line1)
column_two.append(line2)
for line in file1:
if line in file2 and line not in column_three:
column_three.append(line)
column_one.remove(line)
column_two.remove(line)
if len(file2) > len(file1):
restlines = len(file2) - len(file1)
for i in range(restlines):
column_two.append(file2[len(file1)+i])
linedict = dict()
for line in file1:
if line in linedict:
linedict[line] += 1
else:
linedict[line] = 1
for line in file2:
if line in linedict:
linedict[line] += 1
else:
linedict[line] = 1
unified=[]
for key in linedict:
for i in range(linedict[key]):
unified.append(key)
diff = sorted(set(unified))
if not (args.one and args.two and args.three):
if args.sort:
for line in file1:
if line in column_three:
if args.three:
c3 = ''
elif args.one and args.two:
c3 = line + '\n'
elif args.one:
c3 = '\t' + line + '\n'
else:
c3 = '\t\t' + line +'\n'
sys.stdout.write(c3)
column_three.remove(line)
else:
if not args.one:
sys.stdout.write(line + '\n')
for line in column_two:
if args.two:
c2 = ''
elif args.one:
c2 = line + ''
else:
c2 = '\t' + line + '\n';
sys.stdout.write(c2)
else:
for line in diff:
# This is awful.
if args.one and args.two:
c1 = ''
c2 = ''
c3 = line + '\n'
elif args.one and args.three:
c1 = ''
c2 = line + '\n'
c3 = ''
elif args.two and args.three:
c1 = line + '\n'
c2 = ''
c3 = ''
elif args.one:
c1 = ''
c2 = line + '\n'
c3 = '\t\t' + line + '\n'
elif args.two:
c1 = line + '\n'
c2 = ''
c3 = '\t' + line + '\n'
elif args.three:
c1 = line + '\n'
c2 = '\t' + line + '\n'
c3 = ''
else:
c1 = line + '\n'
c2 = '\t' + line + '\n'
c3 = '\t\t' + line + '\n'
if line in column_one:
if line in column_three and linedict[line] >= 2:
sys.stdout.write(c3)
linedict[line] -= 2
for i in range(linedict[line]):
sys.stdout.write(c1)
else:
sys.stdout.write(c1)
elif line in column_two:
if line in column_three and linedict[line] >= 2:
sys.stdout.write(c3)
linedict[line] -= 2
for i in range(linedict[line]):
sys.stdout.write(c2)
else:
for i in range(linedict[line]):
sys.stdout.write(c2)
elif line in column_three and linedict[line] % 2 == 0:
for i in range(linedict[line] - 1):
sys.stdout.write(c3)
linedict[line] -= 1
else:
pass
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment