Skip to content

Instantly share code, notes, and snippets.

@niksmac
Created February 3, 2020 05:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save niksmac/231f93282c99502b35931b6058d95d3b to your computer and use it in GitHub Desktop.
Save niksmac/231f93282c99502b35931b6058d95d3b to your computer and use it in GitHub Desktop.
import pandas as pd
import csv
import math
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('input_filename')
parser.add_argument('file_name')
args = parser.parse_args()
df = pd.read_csv(args.input_filename, skipinitialspace=True, sep=' ') # read file for csv processing
print("file processed Successfully")
def convert_to_csv_clustered():
df['-log'] = list(map(lambda x: -math.log10(x), list(df['P'])))
p = df.loc[df['-log'] >= 5, '-log']
chr = df.loc[df['-log'] >= 5, 'CHR']
a1 = df.loc[df['-log'] >= 5, 'A1']
a2 = df.loc[df['-log'] >= 5, 'A2']
snp = df.loc[df['-log'] >= 5, 'SNP']
bp = df.loc[df['-log'] >= 5, 'BP']
data = list(zip(chr, p, snp, a1, a2, bp))
with open(f'clustered-{args.file_name}', 'w') as out:
out_csv = csv.writer(out)
out_csv.writerow(['CHR', 'P', 'SNP', 'A1', 'A2', 'BP'])
for row in data:
out_csv.writerow(row)
def convert_to_csv():
temp = list(map(lambda x: -math.log10(x), list(df['P'])))
p = list(map(lambda x: round(x, 3), temp))
chr = list(df['CHR'])
if len(df) >= 50000:
data = list(set(zip(chr, p)))
else:
data = list((zip(chr, p)))
with open(f'full-{args.file_name}', 'w') as out:
out_csv = csv.writer(out)
out_csv.writerow(['CHR', 'P'])
for row in data:
out_csv.writerow(row)
convert_to_csv_clustered()
convert_to_csv()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment