odinokov

## VCF.py
"""
VCF.py
Kamil Slowikowski
October 30, 2013

Read VCF files. Works with gzip compressed files and pandas.

Note: This module ignores the genotype columns because
      I didn't need them at the time of writing.

## read_vcf.py
#!/usr/bin/env python

import io
import os
import pandas as pd


def read_vcf(path):
    with open(path, 'r') as f:
        lines = [l for l in f if not l.startswith('##')]

## sort_me.py
'''
Sorting Pandas dataframe by frequency of occurrence in one column
'''
df_ = df.copy()
df_['count'] = df_.groupby('myTAG')['myTAG'].transform('count')
df_ = df_.sort_values(by=['count']).drop(['count'], axis=1).reset_index(drop=True)
df = df_

## combine_vcf_files.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (C) 2016 by Gaik Tamazian
# mail (at) gtamazian (dot) com

import argparse
import glob
import logging
import os

## Add dbSNP IDs to a VCF file
#GATK Method <- Slower and keeps original ID plut dbSNP rsID
# R=Reference FASTA
# V=VCF file to add IDs to
# --dbsnp = dbsnp VCF -- download from NCBI FTP

java -jar GenomeAnalysisTK.jar -R /reference/Homo_sapiens_assembly19.fasta -T VariantAnnotator -V vcf_to_add_id_to.vcf --dbsnp /reference/dbsnp_137.b37.vcf.gz --out /data/Broad.chr1.annotated.vcf

#bcftools Method <- Faster, replaces existing ID with dbSNP rsID
/usr/bin/htslib/bcftools/bcftools annotate -a /reference/dbsnp_137.b37.vcf.gz -c ID vcf_to_add_id_to.vcf

## linear_least_squares.py
#  Copyright (c) 2013 Alexandre Drouin. All rights reserved.
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy of
#  this software and associated documentation files (the "Software"), to deal in
#  the Software without restriction, including without limitation the rights to
#  use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
#  of the Software, and to permit persons to whom the Software is furnished to do
#  so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all

## seq_entropy&complexity.py
from entropy import * # https://raphaelvallat.com/entropy/build/html/index.html
import hfda # https://pypi.org/project/hfda/ Higuchi Fractal Dimension Analysis
import numpy as np

L1 = '000000000011111111110' # less patterns
L2 = '010101010101010101011' # more patterns

def seq2list(l):
    return np.array(list(map(int, list(l))))

## sliding_window.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Sliding windows module.
"""

import numpy as np

from numpy.lib.stride_tricks import as_strided

## README.md

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                odinokov
                / README.md
            
            
              Created
              June 6, 2021 06:56
                — forked from wckdouglas/README.md
            
              
                Running deseq2 in python
              
          
    I have reused the code enough to make a package out of it.
The python package is deposited at Github. with an example in Jupyter notebook.

  
## mode.py
import numpy

def mode(ndarray, axis=0):

    '''
    from https://stackoverflow.com/questions/16330831/most-efficient-way-to-find-mode-in-numpy-array
    '''
    # Check inputs
    ndarray = numpy.asarray(ndarray)
    ndim = ndarray.ndim
	"""
	VCF.py
	Kamil Slowikowski
	October 30, 2013

	Read VCF files. Works with gzip compressed files and pandas.

	Note: This module ignores the genotype columns because
	I didn't need them at the time of writing.
	#!/usr/bin/env python

	import io
	import os
	import pandas as pd


	def read_vcf(path):
	with open(path, 'r') as f:
	lines = [l for l in f if not l.startswith('##')]
	'''
	Sorting Pandas dataframe by frequency of occurrence in one column
	'''
	df_ = df.copy()
	df_['count'] = df_.groupby('myTAG')['myTAG'].transform('count')
	df_ = df_.sort_values(by=['count']).drop(['count'], axis=1).reset_index(drop=True)
	df = df_
	#!/usr/bin/env python
	# -- coding: utf-8 --

	# Copyright (C) 2016 by Gaik Tamazian
	# mail (at) gtamazian (dot) com

	import argparse
	import glob
	import logging
	import os
	#GATK Method <- Slower and keeps original ID plut dbSNP rsID
	# R=Reference FASTA
	# V=VCF file to add IDs to
	# --dbsnp = dbsnp VCF -- download from NCBI FTP

	java -jar GenomeAnalysisTK.jar -R /reference/Homo_sapiens_assembly19.fasta -T VariantAnnotator -V vcf_to_add_id_to.vcf --dbsnp /reference/dbsnp_137.b37.vcf.gz --out /data/Broad.chr1.annotated.vcf

	#bcftools Method <- Faster, replaces existing ID with dbSNP rsID
	/usr/bin/htslib/bcftools/bcftools annotate -a /reference/dbsnp_137.b37.vcf.gz -c ID vcf_to_add_id_to.vcf
	# Copyright (c) 2013 Alexandre Drouin. All rights reserved.
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy of
	# this software and associated documentation files (the "Software"), to deal in
	# the Software without restriction, including without limitation the rights to
	# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	# of the Software, and to permit persons to whom the Software is furnished to do
	# so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all
	from entropy import * # https://raphaelvallat.com/entropy/build/html/index.html
	import hfda # https://pypi.org/project/hfda/ Higuchi Fractal Dimension Analysis
	import numpy as np

	L1 = '000000000011111111110' # less patterns
	L2 = '010101010101010101011' # more patterns

	def seq2list(l):
	return np.array(list(map(int, list(l))))
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""
	Sliding windows module.
	"""

	import numpy as np

	from numpy.lib.stride_tricks import as_strided
	import numpy

	def mode(ndarray, axis=0):

	'''
	from https://stackoverflow.com/questions/16330831/most-efficient-way-to-find-mode-in-numpy-array
	'''
	# Check inputs
	ndarray = numpy.asarray(ndarray)
	ndim = ndarray.ndim