Last active
March 20, 2019 09:09
-
-
Save Bek/26177bab95f352f9e88627f07e152567 to your computer and use it in GitHub Desktop.
This code looks at methylation region in base pair resolution and gets ratio of signal values in C to G nucleotides in ENCODE whole genome bisulfite sequencing experiment ENCSR481JIW
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import pyBigWig\n", | |
"import numpy as np\n", | |
"from collections import Counter\n", | |
"from seqseek import Chromosome, BUILD38" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"read1 = pyBigWig.open('/data/ENCFF527HXB.bigWig')\n", | |
"read2 = pyBigWig.open('/data/ENCFF401QUB.bigWig')\n", | |
"reads = [read1, read2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Chromosome 1 C to G methylation signal ratio: 0.9947901244169821\n", | |
"Chromosome 2 C to G methylation signal ratio: 0.9995215202394079\n", | |
"Chromosome 3 C to G methylation signal ratio: 0.998893114655167\n", | |
"Chromosome 4 C to G methylation signal ratio: 1.001233729506295\n", | |
"Chromosome 5 C to G methylation signal ratio: 0.9975373858762787\n", | |
"Chromosome 6 C to G methylation signal ratio: 1.0010544682421834\n", | |
"Chromosome 7 C to G methylation signal ratio: 0.9986058016259722\n", | |
"Chromosome 8 C to G methylation signal ratio: 1.004084784773276\n", | |
"Chromosome 9 C to G methylation signal ratio: 1.0038816362846827\n", | |
"Chromosome 10 C to G methylation signal ratio: 0.9974756356634797\n", | |
"Chromosome 11 C to G methylation signal ratio: 1.0006294154908835\n", | |
"Chromosome 12 C to G methylation signal ratio: 1.0004684806744195\n", | |
"Chromosome 13 C to G methylation signal ratio: 0.9993184161932953\n", | |
"Chromosome 14 C to G methylation signal ratio: 0.9911055400250699\n", | |
"Chromosome 15 C to G methylation signal ratio: 1.001436609779566\n", | |
"Chromosome 16 C to G methylation signal ratio: 0.995312233175189\n", | |
"Chromosome 17 C to G methylation signal ratio: 1.0016678137609394\n", | |
"Chromosome 18 C to G methylation signal ratio: 1.0040475582392323\n", | |
"Chromosome 19 C to G methylation signal ratio: 0.9988249937330925\n", | |
"Chromosome 20 C to G methylation signal ratio: 0.9906796970343911\n", | |
"Chromosome 21 C to G methylation signal ratio: 0.9966838449473449\n", | |
"Chromosome 22 C to G methylation signal ratio: 1.0014272906485193\n", | |
"Chromosome 1 C to G methylation signal ratio: 0.9960526910656856\n", | |
"Chromosome 2 C to G methylation signal ratio: 1.000064600031073\n", | |
"Chromosome 3 C to G methylation signal ratio: 0.9996395322925136\n", | |
"Chromosome 4 C to G methylation signal ratio: 1.0036357024837599\n", | |
"Chromosome 5 C to G methylation signal ratio: 0.9972686511809082\n", | |
"Chromosome 6 C to G methylation signal ratio: 1.0008522704185978\n", | |
"Chromosome 7 C to G methylation signal ratio: 0.9982317594122002\n", | |
"Chromosome 8 C to G methylation signal ratio: 1.0032637441791028\n", | |
"Chromosome 9 C to G methylation signal ratio: 1.0025260326053047\n", | |
"Chromosome 10 C to G methylation signal ratio: 0.9983249756881316\n", | |
"Chromosome 11 C to G methylation signal ratio: 1.0001001631174917\n", | |
"Chromosome 12 C to G methylation signal ratio: 1.0003598612137274\n", | |
"Chromosome 13 C to G methylation signal ratio: 0.9998022967231733\n", | |
"Chromosome 14 C to G methylation signal ratio: 0.9922772203244011\n", | |
"Chromosome 15 C to G methylation signal ratio: 1.0010901066708042\n", | |
"Chromosome 16 C to G methylation signal ratio: 0.9945628438572668\n", | |
"Chromosome 17 C to G methylation signal ratio: 1.000795346750636\n", | |
"Chromosome 18 C to G methylation signal ratio: 1.003130970029054\n", | |
"Chromosome 19 C to G methylation signal ratio: 0.9998983129938526\n", | |
"Chromosome 20 C to G methylation signal ratio: 0.9913987095500603\n", | |
"Chromosome 21 C to G methylation signal ratio: 0.9960734326696262\n", | |
"Chromosome 22 C to G methylation signal ratio: 1.0029223465935144\n", | |
"Overall C to G methylation signal ratio: 0.9992082399289819\n" | |
] | |
} | |
], | |
"source": [ | |
"overall_counter = Counter()\n", | |
"for file in reads:\n", | |
" for chrom in range(1, 23):\n", | |
" chrom_length = file.chroms()['chr' + str(chrom)] # Integer representing the length of a chromosome\n", | |
" values = file.values('chr' + str(chrom), 0, chrom_length, numpy=True) # All values for entire range of a chromosome\n", | |
" sequence = Chromosome(chrom, assembly=BUILD38).sequence(0,chrom_length)\n", | |
" sequence = np.array(list(str(sequence)))\n", | |
" # The most exciting results emerge from comparison of wrong sequences\n", | |
" # Exciting != Meaningful\n", | |
" assert len(values) == len(sequence) \n", | |
" # We will convert nan values to zeros\n", | |
" values = np.nan_to_num(values)\n", | |
" # We have an array of entire chromosome signal values\n", | |
" # We are only interested in indices where we have a signal\n", | |
" meaningfull_indices = np.argwhere(values > 0).flatten()\n", | |
" # The following line shows that signal values are contiguous across C followed by G nucleotides\n", | |
" # print(meaningful_indices) \n", | |
" counter = Counter()\n", | |
" for index_bp_nucleotide in meaningfull_indices:\n", | |
" counter[sequence[index_bp_nucleotide]] += 1\n", | |
" overall_counter[sequence[index_bp_nucleotide]] += 1\n", | |
" # Printing ratio of C to G (CpG)\n", | |
" print(\"Chromosome {} C to G methylation signal ratio: {}\".format(chrom, counter['C']/counter['G']))\n", | |
"\n", | |
"# Ratio of C to G methylation across all chromosomes\n", | |
"print(\"Overall C to G methylation signal ratio: {}\".format(overall_counter['C']/overall_counter['G']))\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Counter({'C': 116261306, 'G': 116353430})" | |
] | |
}, | |
"execution_count": 87, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"overall_counter" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment