Skip to content

Instantly share code, notes, and snippets.

@timoast
Last active December 8, 2022 09:12
Show Gist options
  • Save timoast/af73c0e9fac00187ee49 to your computer and use it in GitHub Desktop.
Save timoast/af73c0e9fac00187ee49 to your computer and use it in GitHub Desktop.
Calculate paired-end insert size mean and standard deviation, excluding outliers.
#! /usr/local/bin/python2.7
"""
mean_size.py
Created by Tim Stuart
"""
import numpy as np
def get_data(inp):
lengths = []
for line in inp:
if line.startswith('@'):
pass
else:
line = line.rsplit()
length = int(line[8])
if length > 0:
lengths.append(length)
else:
pass
return lengths
def reject_outliers(data, m=2.):
"""
rejects outliers more than 2
standard deviations from the median
"""
median = np.median(data)
std = np.std(data)
for item in data:
if abs(item - median) > m * std:
data.remove(item)
else:
pass
def calc_size(data):
mn = int(np.mean(data))
std = int(np.std(data))
return mn, std
if __name__ == "__main__":
import sys
lengths = get_data(sys.stdin)
reject_outliers(lengths)
mn, std = calc_size(lengths)
print mn, std
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment