Skip to content

Instantly share code, notes, and snippets.

@yk-tanigawa
Created July 4, 2017 07:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yk-tanigawa/ea73b2ac63e20abe62bf4ba3f13c97e3 to your computer and use it in GitHub Desktop.
Save yk-tanigawa/ea73b2ac63e20abe62bf4ba3f13c97e3 to your computer and use it in GitHub Desktop.
plink2 pgenlib.PgenWriter caused a segmentation fault
$ python pgen_write.py
2017-07-04 00:44:50,706 pgen_write DEBUG pgen write test script
2017-07-04 00:44:50,706 read_alleles_range DEBUG reading alleles range 0:10 from ./pgen_in.pgen
2017-07-04 00:44:50,725 read_alleles_range DEBUG The shape of the numpy nd-array is (10, 224676)
2017-07-04 00:44:50,725 pgen_write DEBUG shape of buffer is (10, 224676)
2017-07-04 00:44:50,727 pgen_write DEBUG writing SNP 0 of 10 ...
2017-07-04 00:44:50,728 pgen_write DEBUG writing SNP 1 of 10 ...
2017-07-04 00:44:50,730 pgen_write DEBUG writing SNP 2 of 10 ...
2017-07-04 00:44:50,731 pgen_write DEBUG writing SNP 3 of 10 ...
2017-07-04 00:44:50,733 pgen_write DEBUG writing SNP 4 of 10 ...
2017-07-04 00:44:50,734 pgen_write DEBUG writing SNP 5 of 10 ...
2017-07-04 00:44:50,736 pgen_write DEBUG writing SNP 6 of 10 ...
2017-07-04 00:44:50,737 pgen_write DEBUG writing SNP 7 of 10 ...
Segmentation fault
from __future__ import print_function
import sys
import os
import logging
from logging.config import dictConfig
import numpy as np
import collections as cl
import pgenlib as pg
logging_config = dict(
version = 1,
formatters = {
'f': {'format':
'%(asctime)s %(name)-12s %(levelname)-8s %(message)s'}
},
handlers = {
'h': {'class': 'logging.StreamHandler',
'formatter': 'f',
'level': logging.DEBUG}
},
root = {
'handlers': ['h'],
'level': logging.DEBUG,
},
)
dictConfig(logging_config)
def read_alleles_range(pgen_f, rangeStart, rangeEnd):
logger_read_alleles_range = logging.getLogger('read_alleles_range')
logger_read_alleles_range.debug(
'reading alleles range {}:{} from {}'.format(rangeStart, rangeEnd, pgen_f)
)
with pg.PgenReader(pgen_f) as pgr:
buffer = np.zeros((rangeEnd - rangeStart, pgr.get_raw_sample_ct() * 2), dtype=np.int32)
pgr.read_alleles_range(rangeStart, rangeEnd, buffer)
logger_read_alleles_range.debug(
'The shape of the numpy nd-array is {}'.format(buffer.shape)
)
return buffer
def pgen_write(pgen_in, pgen_out, rangeStart, rangeEnd):
logger_pgen_write = logging.getLogger('pgen_write')
logger_pgen_write.debug(
'pgen write test script'
)
buf = read_alleles_range(pgen_in, rangeStart, rangeEnd)
logger_pgen_write.debug(
'shape of buffer is {}'.format(buf.shape)
)
with pg.PgenWriter(pgen_out, buf.shape[1] / 2, buf.shape[0], True) as pgw:
for snp_pos in range(rangeEnd - rangeStart):
logger_pgen_write.debug('writing SNP {} of {} ...'.format(snp_pos, rangeEnd-rangeStart))
pgw.append_alleles(buf[snp_pos])
# pgw.append_alleles_batch(buf)
# this append_alleles_batch does not work either
def main():
pgen_in = './pgen_in.pgen'
pgen_out = './pgen_out.pgen'
pgen_write(pgen_in, pgen_out, rangeStart = 0, rangeEnd = 10)
if __name__ == '__main__' :
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment