Last active
August 5, 2016 23:20
-
-
Save whilei/bffa208b82889f87da979e723c1bb195 to your computer and use it in GitHub Desktop.
give me real data...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import re | |
import os | |
import csv | |
print "Welcome to the DIGITIZER!" | |
print "=" * 40 | |
print "I'll print all rows containing digits from one CSV into another csv file." | |
print "-" * 40 | |
print "--> File paths should be relative and include .csv extension.\n--> I'll make the output file if it doesn't exist yet; if it does, I'll append to it." | |
input_file_path = raw_input('Input file: ') | |
output_file_path = raw_input('Output file: ') | |
# If output file doesn't exist, create it. Dangerously. | |
if not os.path.exists(output_file_path): | |
open(output_file_path, 'w').close() | |
# Regex look for digit. | |
_digits = re.compile('\d') | |
# Csv read rows are lists of strings. | |
# https://docs.python.org/2/library/csv.html | |
def contains_digits(list): | |
return any(bool(_digits.search(s)) for s in list) | |
# 'rU' works instead of 'rb' given 'universal newline' error on Mac. | |
with open(input_file_path, 'rU') as f: | |
reader = csv.reader(f) | |
for row in reader: | |
print row | |
if contains_digits(row): | |
print 'yep' | |
with open(output_file_path, 'ab') as out: | |
writer = csv.writer(out) | |
writer.writerows([row]) | |
else: | |
print 'nope' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment