Skip to content

Instantly share code, notes, and snippets.

@IsmailM
Last active September 22, 2022 16:25
Show Gist options
  • Save IsmailM/df2245597767d7749c3678086e711b8d to your computer and use it in GitHub Desktop.
Save IsmailM/df2245597767d7749c3678086e711b8d to your computer and use it in GitHub Desktop.
ChromImpute run on individual marks
#!/bin/bash
set -eux
# Run like
# bash ./analysis.sh H3K27ac
# bash ./analysis.sh H3K27ac H3K27me3 H3K36me3 H3K9me3
MARKS=$@
PATTERN=$(echo ${MARKS[@]}|tr " " "|")
DIRPATTERN=$(echo ${MARKS[@]}|tr " " "_")
OUT_DIR=alt/individual/${DIRPATTERN}
mkdir -p ${OUT_DIR}
CONVERTED_DIR=${OUT_DIR}/01_converted_data
GLOBAL_DIST_DIR=${OUT_DIR}/02_global_dist
TRAINDATA_DIR=${OUT_DIR}/03_traindata
TRAINED_PREDICTORS_DIR=${OUT_DIR}/04_trained_predictors
APPLY_DATA_DIR=${OUT_DIR}/05_apply_data
mkdir ${CONVERTED_DIR}
mkdir ${GLOBAL_DIST_DIR}
mkdir ${TRAINDATA_DIR}
mkdir ${TRAINED_PREDICTORS_DIR}
mkdir ${APPLY_DATA_DIR}
cat marks.csv | grep -E ${PATTERN} > ${OUT_DIR}/marks.csv
echo $MARKS > ${OUT_DIR}/marks_list.tsv
# Convert the DATA first...
for MARK in $MARKS; do
chromimpute Convert -m ${MARK} data ${OUT_DIR}/marks.csv hg38.chrom.sizes ${CONVERTED_DIR}
done
for MARK in $MARKS; do
chromimpute ComputeGlobalDist -m ${MARK} ${CONVERTED_DIR} ${OUT_DIR}/marks.csv hg38.chrom.sizes ${GLOBAL_DIST_DIR}
done
# GenerateTrainData
chromimpute GenerateTrainData -dnamethyl wgbs_data/wgbs_data_files.tsv wgbs_data wgbs_data/header.tsv \
-d 100 ${CONVERTED_DIR} ${GLOBAL_DIST_DIR} ${OUT_DIR}/marks.csv hg38.chrom.sizes ${TRAINDATA_DIR} dna_methyl
# Train
samples=(D1 D2 D3 D4 D5 D6 D7 D8)
for sample in "${samples[@]}"; do
chromimpute Train -dnamethyl wgbs_data/header.tsv ${TRAINDATA_DIR} ${OUT_DIR}/marks.csv ${TRAINED_PREDICTORS_DIR} ${sample} dna_methyl
done
samples=(D1 D2 D3 D4 D5 D6 D7 D8)
for sample in "${samples[@]}"; do
chromimpute Apply -dnamethyl wgbs_data/wgbs_data_files.tsv wgbs_data wgbs_data/header.tsv \
${CONVERTED_DIR} ${GLOBAL_DIST_DIR} ${TRAINED_PREDICTORS_DIR} ${OUT_DIR}/marks.csv hg38.chrom.sizes ${APPLY_DATA_DIR} ${sample} dna_methyl
done
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
# Example marks.csv generated by the above script
D1 H3K27ac IHECRE00000101.3.31b03180-586e-4b42-aa9e-2a124599ee9d.pval0.01.500K.narrowPeak.gz
D2 H3K27ac IHECRE00000027.3.97de962a-0cae-4248-ab76-3d3fe1777a34.pval0.01.500K.narrowPeak.gz
D3 H3K27ac IHECRE00000048.3.1896e4ef-d88b-439f-9ac2-0cee1d66c3f3.pval0.01.500K.narrowPeak.gz
D4 H3K27ac IHECRE00000155.3.8279b76b-57ad-4ec8-b5cb-d16fdac7512c.pval0.01.500K.narrowPeak.gz
D5 H3K27ac IHECRE00000774.3.0226d0bd-e448-4e8e-922e-bd3e7a7abd00.pval0.01.500K.narrowPeak.gz
D6 H3K27ac IHECRE00000866.3.0925b7f7-7c86-40ca-bdc0-1ca853709a23.pval0.01.500K.narrowPeak.gz
D7 H3K27ac IHECRE00000718.3.4124a800-a44d-4bfe-b76a-06de52b79d79.pval0.01.500K.narrowPeak.gz
D8 H3K27ac IHECRE00000828.1.45bbe382-78c1-4fac-99df-1349446e9df6.pval0.01.500K.narrowPeak.gz
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
# Original marks.csv used by the above script to create a new marks.csv (see below) for each separate mark
D1 H3K27ac IHECRE00000101.3.31b03180-586e-4b42-aa9e-2a124599ee9d.pval0.01.500K.narrowPeak.gz
D1 H3K27me3 IHECRE00000101.3.15b5016e-771a-4791-960d-6d40861428f9.pval0.01.500K.narrowPeak.gz
D1 H3K36me3 IHECRE00000101.3.06d0440d-166a-4bfb-8784-29eb25ba34d7.pval0.01.500K.narrowPeak.gz
D1 H3K4me1 IHECRE00000101.3.16a3040f-b17c-4d67-9177-3d83db7f277e.pval0.01.500K.narrowPeak.gz
D1 H3K4me3 IHECRE00000101.3.7aca552f-d7f4-4773-8438-94768d4b35a1.pval0.01.500K.narrowPeak.gz
D1 H3K9me3 IHECRE00000101.3.728f2424-573a-4743-bf7a-a87d17545a80.pval0.01.500K.narrowPeak.gz
D2 H3K27ac IHECRE00000027.3.97de962a-0cae-4248-ab76-3d3fe1777a34.pval0.01.500K.narrowPeak.gz
D2 H3K27me3 IHECRE00000027.3.0f1ed0f5-f1e0-4d74-b3bf-fb3d751dc985.pval0.01.500K.narrowPeak.gz
D2 H3K36me3 IHECRE00000027.3.69150add-aea1-4da8-8056-9d84554477f4.pval0.01.500K.narrowPeak.gz
D2 H3K4me1 IHECRE00000027.3.5192d960-f7e6-4c19-807d-86c5f08ffc22.pval0.01.500K.narrowPeak.gz
D2 H3K4me3 IHECRE00000027.3.93c45466-c37d-4584-ac62-84daaf3ab1fa.pval0.01.500K.narrowPeak.gz
D2 H3K9me3 IHECRE00000027.3.ee84a798-d432-4ddb-a16e-2e66142b00aa.pval0.01.500K.narrowPeak.gz
D3 H3K27ac IHECRE00000048.3.1896e4ef-d88b-439f-9ac2-0cee1d66c3f3.pval0.01.500K.narrowPeak.gz
D3 H3K27me3 IHECRE00000048.3.23af1bd1-f0c5-453e-8760-bf047a842cd8.pval0.01.500K.narrowPeak.gz
D3 H3K36me3 IHECRE00000048.3.a2079e84-2427-4d94-b528-ea4d0be6a729.pval0.01.500K.narrowPeak.gz
D3 H3K4me1 IHECRE00000048.3.5644ae9c-281d-4590-b1f4-0e20de6845e6.pval0.01.500K.narrowPeak.gz
D3 H3K4me3 IHECRE00000048.3.8c570f10-9831-4857-a68d-6de7246feef0.pval0.01.500K.narrowPeak.gz
D3 H3K9me3 IHECRE00000048.3.11a776de-361e-4edd-b4c1-1153e9a72498.pval0.01.500K.narrowPeak.gz
D4 H3K27ac IHECRE00000155.3.8279b76b-57ad-4ec8-b5cb-d16fdac7512c.pval0.01.500K.narrowPeak.gz
D4 H3K27me3 IHECRE00000155.3.65fa8cc1-284a-4261-8b82-5bca18ab1a7b.pval0.01.500K.narrowPeak.gz
D4 H3K36me3 IHECRE00000155.3.f96e1b9b-e27a-457e-8b0f-804f1e61ec6e.pval0.01.500K.narrowPeak.gz
D4 H3K4me1 IHECRE00000155.3.856b1d64-01ad-4d8f-893b-b027b47318ee.pval0.01.500K.narrowPeak.gz
D4 H3K4me3 IHECRE00000155.3.8aad1d9d-d4ae-4ccf-a70a-5db7693e9703.pval0.01.500K.narrowPeak.gz
D4 H3K9me3 IHECRE00000155.3.2e98c95e-7e9b-4407-af30-61730f99e499.pval0.01.500K.narrowPeak.gz
D5 H3K27ac IHECRE00000774.3.0226d0bd-e448-4e8e-922e-bd3e7a7abd00.pval0.01.500K.narrowPeak.gz
D5 H3K27me3 IHECRE00000774.3.2555c044-a169-434c-aa5a-cdb2619e5c02.pval0.01.500K.narrowPeak.gz
D5 H3K36me3 IHECRE00000774.3.c74b9473-7abb-43f6-8885-3c40e0552ac6.pval0.01.500K.narrowPeak.gz
D5 H3K4me1 IHECRE00000774.3.ec5f998a-c189-48df-8b3d-327b2930af23.pval0.01.500K.narrowPeak.gz
D5 H3K4me3 IHECRE00000774.3.22c5c812-e487-45a1-b100-255b7ac328af.pval0.01.500K.narrowPeak.gz
D5 H3K9me3 IHECRE00000774.3.01056fa5-2189-424c-aa80-7250e2ec9dc9.pval0.01.500K.narrowPeak.gz
D6 H3K27ac IHECRE00000866.3.0925b7f7-7c86-40ca-bdc0-1ca853709a23.pval0.01.500K.narrowPeak.gz
D6 H3K27me3 IHECRE00000866.3.1ccf14fb-5a5a-4c74-8157-04b7e1f2dcf1.pval0.01.500K.narrowPeak.gz
D6 H3K36me3 IHECRE00000866.3.8ab33063-999a-4988-84dd-a334b327781d.pval0.01.500K.narrowPeak.gz
D6 H3K4me1 IHECRE00000866.3.667978be-7c54-441e-83d2-7798acad8e7b.pval0.01.500K.narrowPeak.gz
D6 H3K4me3 IHECRE00000866.3.2a076e8e-9a2f-43b5-99c3-eddd13655e6b.pval0.01.500K.narrowPeak.gz
D6 H3K9me3 IHECRE00000866.3.0bf1ced9-73d3-4bb4-80e8-08f993f2b136.pval0.01.500K.narrowPeak.gz
D7 H3K27ac IHECRE00000718.3.4124a800-a44d-4bfe-b76a-06de52b79d79.pval0.01.500K.narrowPeak.gz
D7 H3K27me3 IHECRE00000718.3.5e75e9ad-e9d2-4508-9566-bf664ca87cc0.pval0.01.500K.narrowPeak.gz
D7 H3K36me3 IHECRE00000718.3.3d668648-a4ff-43ad-ae6f-bcf86ce49985.pval0.01.500K.narrowPeak.gz
D7 H3K4me1 IHECRE00000718.3.3cf6cab8-5a06-44c2-b745-b249948e89e2.pval0.01.500K.narrowPeak.gz
D7 H3K4me3 IHECRE00000718.3.9c867cb2-87c1-446d-a7eb-1986e48fd4c0.pval0.01.500K.narrowPeak.gz
D7 H3K9me3 IHECRE00000718.3.aa15ac7e-15a0-48e8-acc8-ec3bdca62dd4.pval0.01.500K.narrowPeak.gz
D8 H3K27ac IHECRE00000828.1.45bbe382-78c1-4fac-99df-1349446e9df6.pval0.01.500K.narrowPeak.gz
D8 H3K27me3 IHECRE00000828.1.30590c4c-78e4-4da6-9844-49fd785aa3ea.pval0.01.500K.narrowPeak.gz
D8 H3K36me3 IHECRE00000828.1.ad8aa122-d0d8-45b6-96f2-58aee53acfb7.pval0.01.500K.narrowPeak.gz
D8 H3K4me1 IHECRE00000828.1.cd2006fa-f12a-47e4-a8de-2ef03985dfb6.pval0.01.500K.narrowPeak.gz
D8 H3K4me3 IHECRE00000828.1.3dc2375f-9530-437e-9fe7-54e01b6a99a6.pval0.01.500K.narrowPeak.gz
D8 H3K9me3 IHECRE00000828.1.aa54d00c-3307-444e-b6a2-8d8127554fe8.pval0.01.500K.narrowPeak.gz
chr1 chr-1.tsv.gz
chr2 chr-2.tsv.gz
chr3 chr-3.tsv.gz
chr4 chr-4.tsv.gz
chr5 chr-5.tsv.gz
chr6 chr-6.tsv.gz
chr7 chr-7.tsv.gz
chr8 chr-8.tsv.gz
chr9 chr-9.tsv.gz
chr10 chr-10.tsv.gz
chr11 chr-11.tsv.gz
chr12 chr-12.tsv.gz
chr13 chr-13.tsv.gz
chr14 chr-14.tsv.gz
chr15 chr-15.tsv.gz
chr16 chr-16.tsv.gz
chr17 chr-17.tsv.gz
chr18 chr-18.tsv.gz
chr19 chr-19.tsv.gz
chr20 chr-20.tsv.gz
chr21 chr-21.tsv.gz
chr22 chr-22.tsv.gz
chrX chr-X.tsv.gz
chrY chr-Y.tsv.gz
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment