Skip to content

Instantly share code, notes, and snippets.

@olegs
Created January 12, 2024 11:07
Show Gist options
  • Save olegs/04022f5c35ef31af307087b15264e8ac to your computer and use it in GitHub Desktop.
Save olegs/04022f5c35ef31af307087b15264e8ac to your computer and use it in GitHub Desktop.
Rapid unleashing of macrophage efferocytic capacity via transcriptional pause/release (commands.sh)
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# ATAC-seq & pro-seq & rna-seq analysis
### Analysis of all ATAC-seq datasets
```
# chipseq-smk-pipeline
for FDR in 0.05; do
snakemake all --use-conda --cores all --directory /mnt/stripe/shpynov/2022_Atac-Seq-2/ --config fastq_ext=fastq.gz fastq_dir=/mnt/stripe/shpynov/2022_Atac-Seq-2/fastq genome=mm10 bowtie2_params="-X 2000 --dovetail" macs2_mode=narrow macs2_params="-q $FDR -f BAMPE --nomodel --nolambda -B --call-summits" macs2_suffix="q$FDR";
done
```
### Reads with additional adapters trimming Trimmomatic
```
# Check trimming with trimmomatic
cd 2022_Atac-Seq-2/fastq
for F in *_R1_*; do
echo $F; N=${F/_R1_.fastq.gz/}; PF=${N}_R2_.fastq.gz; echo $PF;
trimmomatic PE -threads 8 -trimlog $N.txt -phred33 $F $PF ../trimmed/$F ../trimmed/${N}_unpaired_R1_.fastq.gz ../trimmed/$PF ../trimmed/${N}_unpaired_R2_.fastq.gz ILLUMINACLIP:/home/user/miniconda3/envs/bio/share/trimmomatic/adapters/NexteraPE-PE.fa:2:30:10:8:true;
done
```
### And reprocess all data
```
for FDR in 0.05; do snakemake all --use-conda --cores all --directory /mnt/stripe/shpynov/2022_Atac-Seq-2/ --config fastq_ext=fastq.gz fastq_dir=/mnt/stripe/shpynov/2022_Atac-Seq-2/trimmed genome=mm10 bowtie2_params="-X 2000 --dovetail" macs2_mode=narrow macs2_params="-q $FDR -f BAMPE --nomodel --nolambda -B --call-summits" macs2_suffix="q$FDR" -n; done
```
### Check non-aligned reads vs hg38
```
cd /mnt/stripe/shpynov/2022_Atac-Seq-2/bams
for F in *.bam; do
echo $F; N=${F/.bam/};
echo "Filtering"
# Extract unmapped read whose mate is mapped
samtools view -u -f 4 -F 264 $F > ../unmapped/${N}_unmapped_1.bam;
# Extract mapped read whose mate is unmapped
samtools view -u -f 8 -F 260 $F > ../unmapped/${N}_unmapped_2.bam;
# Extract unmapped read with unmapped mate
samtools view -u -f 12 -F 256 $F > ../unmapped/${N}_unmapped_3.bam;
echo "Merging"
samtools merge ../unmapped/${N}_unmapped.bam ../unmapped/${N}_unmapped_*.bam;
samtools sort -n -o ../unmapped/${N}_unmapped_sorted.bam ../unmapped/${N}_unmapped.bam;
echo "Bam to fastq"
bedtools bamtofastq -i ../unmapped/${N}_unmapped_sorted.bam -fq ../unmapped/${N}_unmapped.fastq;
rm ../unmapped/${N}_unmapped_*.bam;
done
```
### Subsample all libraries to 20mln
```
# Move previous to _full suffix
READS=20000000
for F in *.bam; do
echo $F
FRACTION=$(samtools idxstats $F | cut -f3 | awk -v ct=$READS 'BEGIN {total=0} {total += $1} END {print ct/total}')
echo $FRACTION
samtools view -@ 4 -b -s ${FRACTION} $F > ${F/.bam/_20mln.bam}
done
# Visualization
for F in *.bam; do echo $F; bamCoverage -b $F -p 6 -o ${F/.bam/.bw}; done
# MACS2 peak calling
for F in *.bam; do echo $F; macs2 callpeak -t $F -g mm -q 0.05 -f BAMPE --nomodel --nolambda -B --call-summits -n ${F/.bam/_q0.05} &> | tee ${F/.bam/_macs2.log};
done
# SPAN peak calling
for F in *.bam; do echo $F; java -jar ~/span-1.0.build.jar analyze -t $F -cs mm10.chrom.sizes --fdr 0.05 -peaks ${F/.bam/_q0.05.peak} &> ${F/.bam/_span.log};
done
```
### Compute consensus peaks for all the groups
```
# IMPORTANT: Call bedtools merge after multiiter!
cd 2022_Atac-Seq-2/macs2
T=$(mktemp);
for F in Alone 15min 30min 45min; do echo $F; bedtools multiinter -i $(ls *$F*.narrowPeak) | grep -E '.*(.*,){2,}.*' | awk -v OFS='\t' '{print $1,$2,$3}' > $T; bedtools merge -i $T > ${F}_macs2_cons3.bed3;
done
cd 2022_Atac-Seq-2/span
for F in Alone 15min 30min 45min; do echo $F; bedtools multiinter -i $(ls *$F*.peak) | grep -E '.*(.*,){2,}.*' | awk -v OFS='\t' '{print $1,$2,$3}' > $T; bedtools merge -i $T > ${F}_span_cons3.bed3;
done
```
### R downstream analysis of consensus peaks
```
library(ChIPpeakAnno)
m15 = toGRanges("/home/oshpynov/data/2022_Atac-Seq-2/macs2_20mln/15min_macs2_cons3.bed3", format="BED", header=FALSE)
m30 = toGRanges("/home/oshpynov/data/2022_Atac-Seq-2/macs2_20mln/30min_macs2_cons3.bed3", format="BED", header=FALSE)
m45 = toGRanges("/home/oshpynov/data/2022_Atac-Seq-2/macs2_20mln//45min_macs2_cons3.bed3", format="BED", header=FALSE)
ma = toGRanges("/home/oshpynov/data/2022_Atac-Seq-2/macs2_20mln//Alone_macs2_cons3.bed3", format="BED", header=FALSE)
ol <- findOverlapsOfPeaks(m15, m30, m45, ma)
png('/home/oshpynov/data/2022_Atac-Seq-2/venn.png')
makeVennDiagram(ol)
dev.off()
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
peaks <- GRangesList(m15=m15, m30=m30, m45=m45, ma=ma)
png('/home/oshpynov/data/2022_Atac-Seq-2/distribution.png', width=6, height=3, units="in", res=1200, pointsize = 1)
genomicElementDistribution(peaks, TxDb = TxDb.Mmusculus.UCSC.mm10.knownGene, promoterRegion=c(upstream=2000, downstream=500), geneDownstream=c(upstream=0, downstream=2000))
dev.off()
overlaps <- ol$peaklist[['m15///m30///m45///ma']]
library(EnsDb.Mmusculus.v79)
annoData <- toGRanges(EnsDb.Mmusculus.v79, feature="gene")
png('/home/oshpynov/data/2022_Atac-Seq-2/anno.png', width=5, height=3, units="in", res=1200, pointsize = 1)
binOverFeature(overlaps, annotationData=annoData,radius=5000, nbins=20, FUN=length, errFun=0, xlab="distance from TSS (bp)", ylab="count", main="Distribution of aggregated peak numbers around TSS")
dev.off()
overlaps.anno <- annotatePeakInBatch(overlaps, AnnotationData=annoData, output="nearestBiDirectionalPromoters", bindingRegion=c(-2000, 500))
library(org.Mm.eg.db)
overlaps.anno <- addGeneIDs(overlaps.anno, "org.Mm.eg.db", IDs2Add = "entrez_id")
write.csv(as.data.frame(unname(overlaps.anno)), "anno.csv")
over <- getEnrichedGO(overlaps.anno, orgAnn="org.Mm.eg.db", condense=TRUE)
png('/home/oshpynov/data/2022_Atac-Seq-2/go.png', width=10, height=4, units="in", res=1200, pointsize = 1)
enrichmentPlot(over)
dev.off()
```
### Diffbind analysis
```
# Create DiffBind config
cd /mnt/stripe/shpynov/2022_Atac-Seq-2/diffbind
echo "SampleID,Tissue,Factor,Condition,Replicate,bamReads,ControlID,bamControl,Peaks,PeakCaller" > config.csv
for F in Mac-15min Mac-30min_AC Mac-45min_AC MacAlone; do for R in 1 2 3 4; do SAMPLE="$F-$R"; BAMREADS=$(find ~/data/2022_Atac-Seq-2/bams_20mln -name "$F*-${R}_20mln.bam"); PEAKS=$(find ~/data/2022_Atac-Seq-2/macs2_20mln -name "$F*-$R*.narrowPeak"); echo "$SAMPLE,,Type,$F,$R,$BAMREADS,,,$PEAKS,bed" >> config.csv; done; done
# R code
library(DiffBind)
setwd("~/data/2022_Atac-Seq-2/diff_alone_vs_45/")
samples = dba(sampleSheet = 'config.csv')
sample_counts = dba.count(samples)
png('counts.png')
plot(sample_counts)
dev.off()
sample_contrast = dba.contrast(sample_counts)
sample_contrast = dba.analyze(sample_contrast)
png('overlap.png')
overlap_rate = dba.overlap(sample_contrast, mode = DBA_OLAP_RATE)
plot(overlap_rate, type = "b", ylab = "# peaks", xlab = "Overlap at least this many peaksets")
dev.off()
png('pca.png')
dba.plotPCA(sample_contrast)
dev.off()
png('ma.png')
dba.plotMA(sample_contrast)
dev.off()
png('volcano.png')
dba.plotVolcano(sample_contrast)
dev.off()
sample_diff = dba.report(sample_contrast)
write.table(sample_diff, 'diff_alone_vs_45.csv', sep = ",", row.names = FALSE)
# BASH csv to bed3
cat diff_alone_vs_45.csv | grep -v 'start' | sed 's/"//g' | awk -v FS=',' -v OFS='\t' '{print $1,$2,$3}' | sort -k1,1 -k2,2n > diff_alone_vs_45.bed3
```
### Analysis of differential peaks
```
# Top 1000 most significant differential peaks
cat alone_vs_45.csv | grep -v 'start' | head -n 1000 | sed 's/"//g' | awk -v FS=',' -v OFS='\t' '{print $1,$2,$3}' | sort -k1,1 -k2,2n > alone_vs_45_top1000.bed3
# Motif enrichment analysis
perl ~/miniconda3/envs/bio/share/homer/bin/findMotifsGenome.pl alone_vs_45_top1000.bed3 mm10 alone_vs_45_top1000_motifs -size 1000 -mask;
# Functional annotation analysis with ChIPSeeker
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
peakAnno <- annotatePeak(peaks, tssRegion=c(-3000, -3000), TxDb=txdb, annoDb="org.Mm.eg.db")
png('/home/oshpynov/data/2022_Atac-Seq-2/diff/peakanno.png', width=5, height=2, units="in", res=1200, pointsize = 1)
plotAnnoBar(peakAnno)
dev.off()
png('/home/oshpynov/data/2022_Atac-Seq-2/diff/tss.png', width=10, height=4, units="in", res=1200, pointsize = 1)
plotDistToTSS(peakAnno, title="Distribution relative to TSS")
dev.off()
# Functional annotation analysis with ChIPPeakAnno
library(ChIPpeakAnno)
peaks = toGRanges("/home/oshpynov/data/2022_Atac-Seq-2/diff/alone_vs_45.bed3", format="BED", header=FALSE)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
png('/home/oshpynov/data/2022_Atac-Seq-2/diff/distribution.png', width=6, height=3, units="in", res=1200, pointsize = 1)
genomicElementDistribution(peaks, TxDb = TxDb.Mmusculus.UCSC.mm10.knownGene, promoterRegion=c(upstream=2000, downstream=500), geneDownstream=c(upstream=0, downstream=2000))
dev.off()
library(EnsDb.Mmusculus.v79)
annoData <- toGRanges(EnsDb.Mmusculus.v79, feature="gene")
png('/home/oshpynov/data/2022_Atac-Seq-2/diff/anno.png')
binOverFeature(peaks, annotationData=annoData,radius=5000, nbins=20, FUN=length, errFun=0, xlab="distance from TSS (bp)", ylab="count", main="Distribution of peaks around TSS")
dev.off()
library(org.Mm.eg.db)
peaks.anno <- annotatePeakInBatch(peaks, AnnotationData=annoData, output="nearestBiDirectionalPromoters", bindingRegion=c(-2000, 500))
peaks.anno <- addGeneIDs(peaks.anno, "org.Mm.eg.db", IDs2Add = "entrez_id")
write.csv(as.data.frame(unname(peaks.anno)), "anno.csv")
over <- getEnrichedGO(peaks.anno, orgAnn="org.Mm.eg.db", condense=TRUE)
png('/home/oshpynov/data/2022_Atac-Seq-2/diff/go.png', width=10, height=4, units="in", res=1200, pointsize = 1)
enrichmentPlot(over)
dev.off()
```
### Motif enrichment with background
```
# Motif enrichment analysis with background
perl ~/miniconda3/envs/bio/share/homer/bin/findMotifsGenome.pl alone_vs_45_top1000.bed3 mm10 alone_vs_45_top1000_motifs_vs_background -size 1000 -mask -bg alone_vs_45_background.bed3;
```
### Prepare background
```
# Prepare all peaks
T=$(mktemp)
bedtools multiinter -i *.narrowPeak | awk -v OFS='\t' '{print $1,$2,$3}' > $T;
bedtools merge -i $T > all_peaks.bed3
# Prepare background as (all_peaks - peaks) + peaks
bedtools subtract -A -a ../macs2_20mln/all_peaks.bed3 -b alone_vs_45.bed3 > $T;
bedtools multiinter -i $T alone_vs_45.bed3 | sort -k1,1 -k2,2n > $T.2;
bedtools merge -i $T.2 > alone_vs_45_background.bed3;
```
### Diffbind pairwise analysis
```
# R code
library(DiffBind)
# Use one of these ...
setwd("~/data/2022_Atac-Seq-2/diff_alone_vs_15/")
setwd("~/data/2022_Atac-Seq-2/diff_alone_vs_30/")
setwd("~/data/2022_Atac-Seq-2/diff_15_vs_45/")
samples = dba(sampleSheet = 'config.csv')
sample_counts = dba.count(samples)
peaksets <- dba.peakset(sample_counts, bRetrieve = TRUE)
png('counts.png')
plot(sample_counts)
dev.off()
sample_contrast = dba.contrast(sample_counts)
sample_contrast = dba.analyze(sample_contrast)
png('overlap.png')
overlap_rate = dba.overlap(sample_contrast, mode = DBA_OLAP_RATE)
plot(overlap_rate, type = "b", ylab = "# peaks", xlab = "Overlap at least this many peaksets")
dev.off()
png('pca.png')
dba.plotPCA(sample_contrast)
dev.off()
png('ma.png')
dba.plotMA(sample_contrast)
dev.off()
png('volcano.png')
dba.plotVolcano(sample_contrast)
dev.off()
sample_diff = dba.report(sample_contrast)
write.table(sample_diff, 'diff_alone_vs_15.csv', sep = ",", row.names = FALSE)
# BASH csv to bed3
cat diff_alone_vs_15.csv | grep -v 'start' | sed 's/"//g' | awk -v FS=',' -v OFS='\t' '{print $1,$2,$3}' | sort -k1,1 -k2,2n > diff_alone_vs_15.bed3
# BASH csv to bed3
cat diff_alone_vs_30.csv | grep -v 'start' | sed 's/"//g' | awk -v FS=',' -v OFS='\t' '{print $1,$2,$3}' | sort -k1,1 -k2,2n > diff_alone_vs_30.bed3
# BASH csv to bed3
cat diff_15_vs_45.csv | grep -v 'start' | sed 's/"//g' | awk -v FS=',' -v OFS='\t' '{print $1,$2,$3}' | sort -k1,1 -k2,2n > diff_15_vs_45.bed3
```
### Compute all differential peaks union
```
# Compute all differential peaks union
T=$(mktemp)
bedtools multiinter -i $(find . -name "diff*.bed3") | awk -v OFS='\t' '{print $1,$2,$3}' > $T
bedtools merge -i $T > diff_pairwise.bed3
# Create DiffBind config
cd /data/2022_Atac-Seq-2/diffbind
echo "SampleID,Tissue,Factor,Condition,Replicate,bamReads,ControlID,bamControl,Peaks,PeakCaller" > config.csv
for F in Mac-15min Mac-30min_AC Mac-45min_AC MacAlone; do for R in 1 2 3 4; do SAMPLE="$F-$R"; BAMREADS=$(find /data/2022_Atac-Seq-2/bams_20mln -name "$F*-${R}_20mln.bam"); echo "$SAMPLE,,Type,$F,$R,$BAMREADS,,,/data/2022_Atac-Seq-2/diff_pairwise.bed3,bed" >> config.csv; done; done
# R code for counts and genes annotating
library(DiffBind)
samples = dba(sampleSheet = 'config.csv')
sample_counts = dba.count(samples)
peaksets <- dba.peakset(sample_counts, bRetrieve = TRUE)
library(EnsDb.Mmusculus.v79)
annoData <- toGRanges(EnsDb.Mmusculus.v79, feature="gene")
library(org.Mm.eg.db)
peaksets <- annotatePeakInBatch(peaksets, AnnotationData=annoData) # Full annotation
peaksets <- addGeneIDs(peaksets, "org.Mm.eg.db", IDs2Add = "entrez_id")
write.csv(peaksets, 'diff_pairwise_counts.csv', row.names = FALSE)
# BASH
cat diff_pairwise_counts.csv | sed 's/,/\t/g' > diff_pairwise_counts.tsv
```
# Integrate with DE RNA-seq data
```
# Inspect number of DE genes with respect to change sign
cd de_filtered_tables
for F in *.csv; do
echo $F;
cat $F | grep -v log2 | wc -l;
cat $F | grep -v log2 | awk -v FS=',' '($3<0) {print $1}' | wc -l;
cat $F | grep -v log2 | awk -v FS=',' '($3>0) {print $1}' | wc -l;
done
```
# Prepare peaks for annotation with related genes
```
for F in diff_alone_vs_45/alone_vs_45.csv diff_alone_vs_15/alone_vs_15.csv diff_alone_vs_30/alone_vs_30.csv \
diff_15_vs_30/diff_15_vs_30.csv diff_15_vs_45/diff_15_vs_45.csv diff_30_vs_45/diff_30_vs_45.csv; do
echo $F
wc -l $F
cat $F | grep -v seqname | awk -v FS=',' -v OFS='\t' '($9<0) {print $1,$2,$3}' | sed 's#"##g' |\
grep -v -E 'chr[^\t]*_' | sort -k1,1 -k2,2n > ${F/.csv/_appear.bed3}
wc -l ${F/.csv/_appear.bed3}
cat $F | grep -v seqname | awk -v FS=',' -v OFS='\t' '($9>0) {print $1,$2,$3}' | sed 's#"##g' |\
grep -v -E 'chr[^\t]*_' | sort -k1,1 -k2,2n > ${F/.csv/_disappear.bed3}
wc -l ${F/.csv/_disappear.bed3}
done
```
Annotate DE ATAC-seq peaks with related genes
```R
library(ChIPpeakAnno)
library(org.Mm.eg.db)
library(EnsDb.Mmusculus.v79)
annoData <- toGRanges(EnsDb.Mmusculus.v79, feature="gene")
FILES = c(
"/data/2022_Atac-Seq-2/macs2_20mln/all_peaks.bed3",
"/data/2022_Atac-Seq-2/diff_alone_vs_45/alone_vs_45_appear.bed3",
"/data/2022_Atac-Seq-2/diff_alone_vs_45/alone_vs_45_disappear.bed3",
"/data/2022_Atac-Seq-2/diff_alone_vs_15/alone_vs_15_appear.bed3",
"/data/2022_Atac-Seq-2/diff_alone_vs_15/alone_vs_15_disappear.bed3",
"/data/2022_Atac-Seq-2/diff_alone_vs_30/alone_vs_30_appear.bed3",
"/data/2022_Atac-Seq-2/diff_alone_vs_30/alone_vs_30_disappear.bed3",
"/data/2022_Atac-Seq-2/diff_15_vs_30/diff_15_vs_30_appear.bed3",
"/data/2022_Atac-Seq-2/diff_15_vs_30/diff_15_vs_30_disappear.bed3",
"/data/2022_Atac-Seq-2/diff_15_vs_45/diff_15_vs_45_appear.bed3",
"/data/2022_Atac-Seq-2/diff_15_vs_45/diff_15_vs_45_disappear.bed3",
"/data/2022_Atac-Seq-2/diff_30_vs_45/diff_30_vs_45_appear.bed3",
"/data/2022_Atac-Seq-2/diff_30_vs_45/diff_30_vs_45_disappear.bed3"
)
for (file in FILES) {
print(file)
peaks = toGRanges(file, format="BED", header=FALSE)
peaks.anno <- annotatePeakInBatch(peaks, AnnotationData=annoData, output="nearestBiDirectionalPromoters", bindingRegion=c(-2000, 500))
peaks.anno <- addGeneIDs(peaks.anno, "org.Mm.eg.db", IDs2Add = "entrez_id")
write.csv(as.data.frame(unname(peaks.anno)), paste(file, ".csv", sep=""))
}
```
Overlap genes from ATAC-seq DE and RNA-seq DE.\
Check different timepoints DE results vs peak in ATAC-seq.
```bash
mkdir -p rna-atac
# Filter RNA-seq DE genes versus ATAC-seq files
for DE in de_filtered_tables/45_vs_alone_filter_table.csv de_filtered_tables/90_vs_alone_filter_table.csv de_filtered_tables/180_vs_alone_filter_table.csv; do
for P in all_peaks.bed3.csv \
diff_alone_vs_45/alone_vs_45_appear.bed3.csv diff_alone_vs_45/alone_vs_45_disappear.bed3.csv \
diff_alone_vs_30/alone_vs_30_appear.bed3.csv diff_alone_vs_30/alone_vs_30_disappear.bed3.csv \
diff_alone_vs_15/alone_vs_15_appear.bed3.csv diff_alone_vs_15/alone_vs_15_disappear.bed3.csv \
diff_15_vs_30/diff_15_vs_30_appear.bed3.csv diff_15_vs_30/diff_15_vs_30_disappear.bed3.csv \
diff_15_vs_45/diff_15_vs_45_appear.bed3.csv diff_15_vs_45/diff_15_vs_45_disappear.bed3.csv \
diff_30_vs_45/diff_30_vs_45_appear.bed3.csv diff_30_vs_45/diff_30_vs_45_disappear.bed3.csv; do
echo "Differential expression $DE $(cat $DE | wc -l) vs peaks $P $(cat $P | wc -l)";
DEF=$(basename $DE | sed -E 's/\..*//');
PF=$(basename $P | sed -E 's/\..*//');
echo "Diff $DE < 0: $(cat $DE | grep -v log2 | awk -v FS=',' '($3<0) {print $8}' | wc -l) vs $P";
touch rna-atac/rna_${DEF}_gt0_vs_atac_${PF}.csv;
cat $DE | grep -v log2 | awk -v FS=',' '($3<0) {print $8}' | sed -E 's/\..*//' | while read -r ENS; do
grep $ENS $P; done > rna-atac/rna_${DEF}_gt0_vs_atac_${PF}.csv;
echo "Diff $DE > 0: $(cat $DE | grep -v log2 | awk -v FS=',' '($3>0) {print $8}' | wc -l) vs $P";
touch rna-atac/rna_${DEF}_ls0_vs_atac_${PF}.csv;
cat $DE | grep -v log2 | awk -v FS=',' '($3>0) {print $8}' | sed -E 's/\..*//' | while read -r ENS; do
grep $ENS $P; done > rna-atac/rna_${DEF}_ls0_vs_atac_${PF}.csv;
done;
done;
# Produce BED, TXT with gene name and Ensemble gene names
cd rna-atac
for F in *.csv; do
echo $F;
cat $F | awk -v FS=',' -v OFS='\t' '{print $2,$3,$4,$16,$12,$14,$15}' |\
sed 's/"//g' | sort -k1,1 -k2,2n > ${F/.csv/.bed};
cat $F | awk -v FS=',' '{print $16}' | sed 's/"//g' | sort --unique > ${F/.csv/_gene.txt};
cat $F | awk -v FS=',' '{print $8}' | sed 's/"//g' | sort --unique > ${F/.csv/_ens.txt};
wc -l ${F/.csv/_ens.txt};
done
```
Save positions of all ATAC-seq peaks associated with genes
```
cat all_peaks.bed3.csv | grep -v seqnames | awk -v FS=',' -v OFS='\t' '{print $2,$3,$4,$8,$12,$14,$15}' |\
sed 's/"//g' | sort -k1,1 -k2,2n > all_peaks.bed
cat all_peaks.bed3.csv | grep -v seqnames | awk -v FS=',' -v OFS='\t' '{print $8}' | sed 's/"//g' |\
sort --unique > all_peaks.txt
```
Analyze clusters of ATAC-seq data - launch `analysis.ipynb` jupyter notebook
```
cd ~/data/2022_Atac-Seq-2
perl ~/miniconda3/envs/bio/share/homer/bin/findMotifsGenome.pl diff_pairwise.bed3 mm10 diff_pairwise -mask
cd clusters
for F in *.bed3; do
perl ~/miniconda3/envs/bio/share/homer/bin/findMotifsGenome.pl $F mm10 ${F/.bed3/} -mask
done
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment