arq5x/gist:8d9c2767d2495ba4b9bf6f555d29c088

## gistfile1.txt
# mnake sure all rows have 99 fields
$ awk 'BEGIN{FS="\t"} {print NF}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt | uniq
99

# how many HET (1) and HOM_ALT (3) genotypes were there?
$ awk 'BEGIN{FS="\t"} {print $99}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt | sort | uniq -c

# get rid of headers except for the first one
(head -n 1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt; grep -v gt_types /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt)

# make sure there are no LOW entries (note grep -w for specifificty)
(head -n 1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt; grep -v gt_types /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt) | grep -w LOW

# what col number is the gene column?
head -1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt | tr "\t" "\n" | cat -n | grep gene
20	gene

# print, in descending order, the number of variants observed per gene
awk '{print $20}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt | sort | uniq -c | awk '{print $2"\t"$1}' | sort -k2,2nr | head -n 50
MUC16	25126
MUC6	20998
AHNAK2	8647
TTN	8244
MUC5B	6907
MUC19	6872
FLG	6681
NBPF10	6375
ANKRD36	5687
HRNR	5576
NBPF1	5472
FAM230A	4745
PABPC3	4733
FAM182B	4497
FRG1B	4409
PDE4DIP	4334
TAS2R31	4208
IGFN1	4176
FCGBP	3925
OBSCN	3869
MUC2	3636
AHNAK	3621
TAS2R43	3586
SERPINA1	3432
CTD-3088G3.8	3371
TAS2R19	3115
ANKRD36C	3096
MKI67	3073
ANKRD30B	2986
TAS2R46	2905
NBPF14	2796
GOLGA6L2	2717
NEB	2652
PRAMEF1	2621
TCHH	2564
FLG2	2548
KRT18	2455
ZNF257	2412
NBPF12	2374
PLIN4	2371
NBPF9	2349
FAM186A	2348
PRAMEF2	2345
PRAMEF4	2251
OR4A16	2247
GXYLT1	2228
CNN2	2200
FSIP2	2194
RP11-683L23.1	2173
LILRA6	2137
	# mnake sure all rows have 99 fields
	$ awk 'BEGIN{FS="\t"} {print NF}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt \| uniq
	99

	# how many HET (1) and HOM_ALT (3) genotypes were there?
	$ awk 'BEGIN{FS="\t"} {print $99}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt \| sort \| uniq -c

	# get rid of headers except for the first one
	(head -n 1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt; grep -v gt_types /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt)

	# make sure there are no LOW entries (note grep -w for specifificty)
	(head -n 1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt; grep -v gt_types /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt) \| grep -w LOW

	# what col number is the gene column?
	head -1 /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt \| tr "\t" "\n" \| cat -n \| grep gene
	20 gene

	# print, in descending order, the number of variants observed per gene
	awk '{print $20}' /uufs/chpc.utah.edu/common/home/u1072926/gemini_queries/all.txt \| sort \| uniq -c \| awk '{print $2"\t"$1}' \| sort -k2,2nr \| head -n 50
	MUC16 25126
	MUC6 20998
	AHNAK2 8647
	TTN 8244
	MUC5B 6907
	MUC19 6872
	FLG 6681
	NBPF10 6375
	ANKRD36 5687
	HRNR 5576
	NBPF1 5472
	FAM230A 4745
	PABPC3 4733
	FAM182B 4497
	FRG1B 4409
	PDE4DIP 4334
	TAS2R31 4208
	IGFN1 4176
	FCGBP 3925
	OBSCN 3869
	MUC2 3636
	AHNAK 3621
	TAS2R43 3586
	SERPINA1 3432
	CTD-3088G3.8 3371
	TAS2R19 3115
	ANKRD36C 3096
	MKI67 3073
	ANKRD30B 2986
	TAS2R46 2905
	NBPF14 2796
	GOLGA6L2 2717
	NEB 2652
	PRAMEF1 2621
	TCHH 2564
	FLG2 2548
	KRT18 2455
	ZNF257 2412
	NBPF12 2374
	PLIN4 2371
	NBPF9 2349
	FAM186A 2348
	PRAMEF2 2345
	PRAMEF4 2251
	OR4A16 2247
	GXYLT1 2228
	CNN2 2200
	FSIP2 2194
	RP11-683L23.1 2173
	LILRA6 2137