Skip to content

Instantly share code, notes, and snippets.

@chponte
Created July 21, 2022 12:39
Show Gist options
  • Save chponte/04ffc85eaf12c5c283f2ecdc60d18abf to your computer and use it in GitHub Desktop.
Save chponte/04ffc85eaf12c5c283f2ecdc60d18abf to your computer and use it in GitHub Desktop.
Ad hoc filter script
from random import randint
import pandas as pd
def main():
with open("/home/chponte/Descargas/Bmedia_15MD_002MAF_filtered.new.vcf", "x") as fout:
with open("/home/chponte/Descargas/Bmedia_15MD_002MAF_filtered.vcf", "r") as fin:
l = fin.readline()
while not l.startswith("#CHROM"):
fout.write(l)
l = fin.readline()
fout.write(l)
df = pd.read_csv(fin, sep="\t", names=l.strip().split("\t"))
for _, sub_df in df.groupby("#CHROM", sort=False):
index = randint(0, sub_df.shape[0] - 1)
# index = 0
s = sub_df.iloc[index:index+1, :]
s.to_csv(fout, sep="\t", header=False, index=False)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment