Skip to content

Instantly share code, notes, and snippets.

@HanaanY
Created August 3, 2018 00:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HanaanY/4ecc4d677dc5fc5fa2cbf36fd8cc1e47 to your computer and use it in GitHub Desktop.
Save HanaanY/4ecc4d677dc5fc5fa2cbf36fd8cc1e47 to your computer and use it in GitHub Desktop.
import csv
import os.path
import itertools
from collections import defaultdict
data_path = '/home/naan/SnapshotSerengeti/data/'
def raw_sampler(rawfile, outfile, n=500):
raw_path = os.path.join(data_path, rawfile)
sample_path = os.path.join(data_path, outfile)
with open(raw_path, 'r', newline='') as raw, \
open(sample_path, 'w', newline='') as samples:
rawReader = csv.reader(raw)
sampleWriter = csv.writer(samples)
for row in itertools.islice(rawReader,n): # we only want a sample
sampleWriter.writerow(row)
def resolve_data_path(filename):
return os.path.join(data_path, filename)
def gather_blank_consensus(referencefile, infile, outfile):
ref_path = resolve_data_path(referencefile)
in_path = resolve_data_path(infile)
out_path = resolve_data_path(outfile)
ref_ids = []
with open(ref_path, 'r', newline='') as ref:
refReader = csv.reader(ref)
for row in refReader:
ref_ids.append(row[0])
with open(in_path, 'r', newline ='') as inf, \
open(out_path, 'w', newline='') as outf:
inReader = csv.reader(inf)
outWriter = csv.writer(outf)
for row in inReader:
if row[0] in ref_ids:
pass
else:
outWriter.writerows(row)
gather_blank_consensus('consensus_data.csv', 'raw_data_for_dryad.csv', 'raw_data_minus_consensus.csv')
raw_sampler('raw_data_minus_consensus', 'raw_data_minus_consensus_sample.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment