Created
March 13, 2022 20:50
-
-
Save xwu/813c71c047b25513ec717e271aca72aa to your computer and use it in GitHub Desktop.
Command line utility for encoding and decoding SEQC 3-bit representations of DNA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import sys | |
from functools import reduce | |
from typing import Iterator | |
def encode(raw: str) -> int: | |
return reduce(lambda x, y: x * 8 + y, | |
map(lambda x: x % 32 % 5 + 3, raw.encode())) | |
def _decode(cooked: int) -> Iterator[int]: | |
temporary = cooked | |
while temporary > 0: | |
yield (84, 65, 71, 67, 78)[(temporary & 7) - 3] | |
temporary //= 8 | |
def decode(cooked: int) -> str: | |
return bytes(_decode(cooked))[::-1].decode() | |
def main() -> int: | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-s', '--separator', default=',') | |
parser.add_argument('-e', '--encode', nargs='+', type=int, default=[]) | |
parser.add_argument('-d', '--decode', nargs='+', type=int, default=[]) | |
args = parser.parse_args() | |
for line in sys.stdin: | |
columns = line.split(args.separator) | |
if len(args.encode) == 0 and len(args.decode) == 0: | |
columns = [decode(int(column.strip())) for column in columns] | |
else: | |
for idx in args.encode: | |
columns[idx] = str(encode(columns[idx].strip())) | |
for idx in args.decode: | |
columns[idx] = decode(int(columns[idx].strip())) | |
print(args.separator.join(columns)) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment