klprint/parse_10x_output.sh

## parse_10x_output.sh
#!/bin/bash

# The following script parses the 10x chromoium sparse matrix.
# It replaces the First column with the ENSEMBL gene ID and the second,
# if needed, with the cell barcode (just uncomment the second awk script).

# It needs the three 10x chromium outputs as follows:
# 1. genes.tsv
# 2. matrix.mtx
# 3. barcodes.tsv

# How does it do that?
# The first awk generates a hashtable (h) which stores the linenumber
# where a specific gene is located in the genes.tsv file.
# Next, it goes through each line of the matrix.mtx file and replaces the first column
# with the appropriate ENSEMBL gene ID.
# If the second awk statement is uncommented, the same is done with the barcodes.tsv file,
# replacing the second column in matrix.mtx with the cell barcode.

# The output is saved in the file parsed_sparse.mtx

awk 'NR == FNR {h[NR] = $1; next} {print h[$1],$2,$3}' genes.tsv matrix.mtx | \
    # awk 'NR == FNR {h[NR] = $1; next} {print $1,h[$2],$3}' barcodes.tsv - | \
    tail -n +4 \
         > parsed_sparse.mtx
	#!/bin/bash

	# The following script parses the 10x chromoium sparse matrix.
	# It replaces the First column with the ENSEMBL gene ID and the second,
	# if needed, with the cell barcode (just uncomment the second awk script).

	# It needs the three 10x chromium outputs as follows:
	# 1. genes.tsv
	# 2. matrix.mtx
	# 3. barcodes.tsv

	# How does it do that?
	# The first awk generates a hashtable (h) which stores the linenumber
	# where a specific gene is located in the genes.tsv file.
	# Next, it goes through each line of the matrix.mtx file and replaces the first column
	# with the appropriate ENSEMBL gene ID.
	# If the second awk statement is uncommented, the same is done with the barcodes.tsv file,
	# replacing the second column in matrix.mtx with the cell barcode.

	# The output is saved in the file parsed_sparse.mtx

	awk 'NR == FNR {h[NR] = $1; next} {print h[$1],$2,$3}' genes.tsv matrix.mtx \| \
	# awk 'NR == FNR {h[NR] = $1; next} {print $1,h[$2],$3}' barcodes.tsv - \| \
	tail -n +4 \
	> parsed_sparse.mtx