Skip to content

Instantly share code, notes, and snippets.

@Phlya
Created July 11, 2023 12:00
Show Gist options
  • Save Phlya/9f461871c5dd97015fa51b5b9e83c3d2 to your computer and use it in GitHub Desktop.
Save Phlya/9f461871c5dd97015fa51b5b9e83c3d2 to your computer and use it in GitHub Desktop.
uneuploidy_quaich_config.yml
bedname file format distal by_distance local local_rescaled by_strand_by_distance by_strand_local by_strand_distal by_window_short_range by_window_long_range distal_rescaled local_rescaled_by_chrom_group distal_rescaled_by_chrom_group
cLADs resources/annotations/cLADs.bed bed - - - - - - - - - - x x
sample file do_tads celltype sex chromosomes
sHF18 /store/razinlab/flyamer/subsampled_coolers/sHF18.hg38.mapq_30.1000.sampled.mcool False fibroblasts female +18
sPFCH6 /store/razinlab/flyamer/subsampled_coolers/sPFCH6.hg38.mapq_30.1000.sampled.mcool False fibroblasts female euploid
s3220 /store/razinlab/flyamer/subsampled_coolers/s3220.hg38.mapq_30.1000.sampled.mcool False chorion male +Y
s3493 /store/razinlab/flyamer/subsampled_coolers/s3493.hg38.mapq_30.1000.sampled.mcool False chorion female +X
s3471 /store/razinlab/flyamer/subsampled_coolers/s3471.hg38.mapq_30.1000.sampled.mcool False chorion male +16
s3475 /store/razinlab/flyamer/subsampled_coolers/s3475.hg38.mapq_30.1000.sampled.mcool False chorion female +13
s3496 /store/razinlab/flyamer/subsampled_coolers/s3496.hg38.mapq_30.1000.sampled.mcool False chorion female +13
s3495 /store/razinlab/flyamer/subsampled_coolers/s3495.hg38.mapq_30.1000.sampled.mcool False chorion female -X
s3492 /store/razinlab/flyamer/subsampled_coolers/s3492.hg38.mapq_30.1000.sampled.mcool False chorion female euploid
s3524 /store/razinlab/flyamer/subsampled_coolers/s3524.hg38.mapq_30.1000.sampled.mcool False chorion female euploid
s3525 /store/razinlab/flyamer/subsampled_coolers/s3525.hg38.mapq_30.1000.sampled.mcool False chorion female euploid
s3494 /store/razinlab/flyamer/subsampled_coolers/s3494.hg38.mapq_30.1000.sampled.mcool False chorion male euploid
s3518 /store/razinlab/flyamer/subsampled_coolers/s3518.hg38.mapq_30.1000.sampled.mcool False chorion male euploid
IsoE /store/razinlab/flyamer/subsampled_coolers/IsoE.hg38.mapq_30.1000.sampled.mcool False iPSC unknown
IsoT /store/razinlab/flyamer/subsampled_coolers/IsoT.hg38.mapq_30.1000.sampled.mcool False iPSC unknown
NPC_IsoE /store/razinlab/flyamer/subsampled_coolers/NPC_IsoE.hg38.mapq_30.1000.sampled.mcool False NPC unknown
NPC_IsoT /store/razinlab/flyamer/subsampled_coolers/NPC_IsoT.hg38.mapq_30.1000.sampled.mcool False NPC unknown
NPC_MaE /store/razinlab/flyamer/subsampled_coolers/NPC_MaE.hg38.mapq_30.1000.sampled.mcool False NPC unknown
NPC_MaT /store/razinlab/flyamer/subsampled_coolers/NPC_MaT.hg38.mapq_30.1000.sampled.mcool False NPC unknown
sample cLADs
sHF18 x
sPFCH6 x
s3220 x
s3493 x
s3471 x
s3475 x
s3496 x
s3495 x
s3492 x
s3524 x
s3525 x
s3494 x
s3518 x
IsoE x
IsoT x
NPC_IsoE x
NPC_IsoT x
NPC_MaE x
NPC_MaT x
# Setup the genome and view to use for analyses
genome: hg38
path_genome_folder: resources/genome/
path_genome_fasta: ../../common_data/genomes/hg38/hg38.fa
chromsizes: resources/genome/hg38_chromsizes.txt
view: resources/hg38_arms.txt
# Samples file with two columns: sample name ("sample") and path/url with cool file ("file").
# If it's URL, it will be downloaded. If the file is already downloaded it will not be overwritten.
# The downloaded file will be stored as {coolers_folder}/{sample}.mcool
samples: config/samples.tsv
# Setting up pairwise comparisons of samples
# We just have two samples, so no need to ensure certain fields match, so we set this
# setting to null
fields_to_match:
- celltype
- sex
# To compare by cell type, we list the name of the column with cell type from the
# samples.tsv file, and indicate which cell type we consider as reference for the
# comparisons
fields_to_differ:
chromosomes: euploid # reference value us used to set "control" samples, with the rest being "treatment"
# Annotations file with two columns: annotation name ("bedname") and "file" (URLs or local file)
# Downloaded bed files will be stored in beds_folder
annotations: config/annotations.tsv
# Rules about correpondence between samples and annotations can be specified here
samples_annotations_combinations: config/samples_annotations.tsv
# folder definition is optional
# by default everything is done using inputs in inputs/
# and results saved in results/
inputs_folder: inputs
beds_folder: inputs/beds
bedpes_folder: inputs/bedpes
coolers_folder: inputs/coolers
project_folder: results
expected_folder: results/expected
pileups_folder: results/pileups
eigenvectors_folder: results/eigenvectors
eig_profiles_folder: results/eigenvectors/eig_profiles
compartments_folder: results/eigenvectors/compartments
saddles_folder: results/eigenvectors/saddles
pentads_folder: results/eigenvectors/pentads
insulation_folder: results/insulation
tads_folder: results/insulation/tads
boundaries_folder: results/insulation/boundaries
dots_folder: results/dots
expected:
do: True #If False, still will calculate them when needed by other rules
cis: True
trans: True
resolutions:
- 1000
- 10000
- 100000
- 250000
- 500000
- 1000000
extra_args_cis: "--smooth --aggregate-smoothed --clr-weight-name weight_cis"
extra_args_trans: "--clr-weight-name weight_trans"
# Parameters for calculation of the eigenvectors (cooltools eigs-cis or eigs-trans)
# They are phased using GC content whcih is automatically calculated from the genome .fastqs
eigenvectors:
do: True
cis: True
trans: False
resolutions:
- 100000
- 250000
- 500000
- 1000000
save_compartment_beds: True # If True, will run a simple HMM to save a .bed file
# with coordinates of A and B compartments
# Parameters for saddles - a way to quantify global compartment structure (cooltools saddle)
saddles:
do: True
bins:
- 100
distance_limits:
- 1600000
- 102400000
range: "--qrange 0.01 0.99 "
extra: "--strength"
# Parameters for pentads, another way to average compartments
# Described in https://doi.org/10.1186/s12859-022-04654-6, but reimplemented using coolpuppy here
pentads:
do: False
do_diff: False
data_resolution: 100000
eigenvector_resolution: 250000
groupby:
- chrom1
norms:
- expected
modes:
- local
- distal
# Parameters for pileups using coolpuppy
pileups:
do: True
resolutions:
# - 10000
- 50000
# - 100000
distance_limits:
- 500000
- 100000000
# - 12500
# - 102400000
shifts: 0
expected: True
ooe: True #Only applies when expected is True
arguments: # Different modes of pileups that can be applied to different annotation
# .bed files, relationship defined in samples_annotations_combinations file above
# distal: "--mindist 500000 --clr-weight-name weight_cis"
# by_distance: "--by_distance --clr-weight-name weight_cis"
# local: "--local --clr-weight-name weight_cis"
local_rescaled: '--local --rescale --rescale_pad 1 --clr-weight-name weight_cis'
local_rescaled_by_chrom_group: "--local --rescale --rescale_pad 1 --clr-weight-name weight_cis --groupby chrom_group1"
distal_rescaled: '--mindist 500000 --rescale --rescale_pad 1 --clr-weight-name weight_cis'
distal_rescaled_by_chrom_group: '--mindist 500000 --rescale --rescale_pad 1 --clr-weight-name weight_cis --groupby chrom_group1'
# by_strand_by_distance: "--by_strand --by_distance"
# by_strand_local: "--by_strand --local"
# by_strand_distal: "--by_strand --maxdist 1000000"
# by_window_short_range: "--by_window --subset 1000 --maxdist 2000000"
# by_window_long_range: "--by_window --subset 1000 --mindist 2000000"
# Parameters for calling dots/loops, using different methods
dots:
methods:
cooltools:
do: False
extra: "--max-loci-separation 10000000 --fdr 0.02"
chromosight:
do: False
extra: ""
mustache:
do: False
max_dist: 10000000
extra: "-pt 0.05 -st 0.8"
resolutions:
- 10000
pileup: True
# Parameters to calculate insulation score and how to threshold (cooltools insulation)
insulation:
do: False
resolutions:
1000:
- 3000 # What window sizes to do for what resolution
- 5000
- 10000
- 25000
10000:
- 50000
- 100000
extra: '--chunksize 1000000000 --threshold Li --ignore-diags 2' # For the threshold,
# choose between a specific float, Li or Otsu,
# see https://github.com/open2c/open2c_examples/blob/master/Insulation_and_boundaries.ipynb
pileup: True # Whether to pileup thresholded boundaries
# Find differential boundaries between pairs of samples using a simple presence/absence or FC threshold for boundary strength
compare_boundaries:
do: False
fold_change_threshold: 5
pileup: True
# Combine neighbouring strong insulation score boundaries into TADs
TADs:
do: False
resolutions:
10000: # What window sizes to do for what resolution
- 50000
- 100000
- 1000000
max_tad_length: 1500000
pileup: True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment