Created
August 3, 2018 00:16
-
-
Save HanaanY/4ecc4d677dc5fc5fa2cbf36fd8cc1e47 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import os.path | |
import itertools | |
from collections import defaultdict | |
data_path = '/home/naan/SnapshotSerengeti/data/' | |
def raw_sampler(rawfile, outfile, n=500): | |
raw_path = os.path.join(data_path, rawfile) | |
sample_path = os.path.join(data_path, outfile) | |
with open(raw_path, 'r', newline='') as raw, \ | |
open(sample_path, 'w', newline='') as samples: | |
rawReader = csv.reader(raw) | |
sampleWriter = csv.writer(samples) | |
for row in itertools.islice(rawReader,n): # we only want a sample | |
sampleWriter.writerow(row) | |
def resolve_data_path(filename): | |
return os.path.join(data_path, filename) | |
def gather_blank_consensus(referencefile, infile, outfile): | |
ref_path = resolve_data_path(referencefile) | |
in_path = resolve_data_path(infile) | |
out_path = resolve_data_path(outfile) | |
ref_ids = [] | |
with open(ref_path, 'r', newline='') as ref: | |
refReader = csv.reader(ref) | |
for row in refReader: | |
ref_ids.append(row[0]) | |
with open(in_path, 'r', newline ='') as inf, \ | |
open(out_path, 'w', newline='') as outf: | |
inReader = csv.reader(inf) | |
outWriter = csv.writer(outf) | |
for row in inReader: | |
if row[0] in ref_ids: | |
pass | |
else: | |
outWriter.writerows(row) | |
gather_blank_consensus('consensus_data.csv', 'raw_data_for_dryad.csv', 'raw_data_minus_consensus.csv') | |
raw_sampler('raw_data_minus_consensus', 'raw_data_minus_consensus_sample.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment