Tabea-K/nr_of_common_lines_per_column.sh

## nr_of_common_lines_per_column.sh
#!/usr/bin/env bash
# Prints the number of identical rows between different columns for two
# csv files. The first argument is the column number which should be used.
# For example, you can compare the IDs given in a csv file.

cut -f $1 $2 | sort > .file1
cut -f $1 $3 | sort > .file2


# With no options, comm produces three-column output.
# Column one contains lines unique to FILE1, column
# two contains lines unique to FILE2, and column three
# contains lines common to both files.

UNIQUEINFILE1=$(comm .file1 .file2 | cut -f1 | uniq | wc -l)
UNIQUEINFILE2=$(comm .file1 .file2 | cut -f2 | uniq | wc -l)
INBOTHFILES=$(comm .file1 .file2 | cut -f3 | uniq | wc -l)

echo "There are $UNIQUEINFILE1 lines that are only found in $2"
echo "There are $UNIQUEINFILE2 lines that are only found in $3"
echo "There are $INBOTHFILES lines found in both files"
	#!/usr/bin/env bash
	# Prints the number of identical rows between different columns for two
	# csv files. The first argument is the column number which should be used.
	# For example, you can compare the IDs given in a csv file.

	cut -f $1 $2 \| sort > .file1
	cut -f $1 $3 \| sort > .file2


	# With no options, comm produces three-column output.
	# Column one contains lines unique to FILE1, column
	# two contains lines unique to FILE2, and column three
	# contains lines common to both files.

	UNIQUEINFILE1=$(comm .file1 .file2 \| cut -f1 \| uniq \| wc -l)
	UNIQUEINFILE2=$(comm .file1 .file2 \| cut -f2 \| uniq \| wc -l)
	INBOTHFILES=$(comm .file1 .file2 \| cut -f3 \| uniq \| wc -l)

	echo "There are $UNIQUEINFILE1 lines that are only found in $2"
	echo "There are $UNIQUEINFILE2 lines that are only found in $3"
	echo "There are $INBOTHFILES lines found in both files"