Skip to content

Instantly share code, notes, and snippets.

@jRimbault
Last active March 2, 2021 12:20
Show Gist options
  • Save jRimbault/6dd409a92a99e0c4f4b724e2bfcca384 to your computer and use it in GitHub Desktop.
Save jRimbault/6dd409a92a99e0c4f4b724e2bfcca384 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import csv
import os
import sys
from pathlib import Path
def main(args):
csv_walker = (
path.absolute()
for path in (
Path(os.path.join(path, file))
for path, _, files in os.walk(args.directory)
for file in files
)
if path.suffix == ".csv"
)
errors = (
(path, line_number, line)
for path in csv_walker
for line_number, line in collect_errors(path)
)
with open(args.output, "w", newline="") as out:
writer = csv.writer(
out, dialect="excel", delimiter=";", quoting=csv.QUOTE_NONNUMERIC
)
writer.writerow(("filename", "line", "content"))
writer.writerows(errors)
def collect_errors(path):
with open(path) as file:
for i, line in enumerate(file):
if i == 0:
continue # skips header line
if "?" in line:
yield i + 1, line.rstrip()
def parse_args(argv):
parser = argparse.ArgumentParser()
parser.add_argument("directory", help="directory with the CSVs", default=os.curdir)
parser.add_argument("output", help="output file", default="results.csv", nargs="?")
return parser.parse_args(argv)
if __name__ == "__main__":
main(parse_args(sys.argv[1:]))
#!/usr/bin/env python3
import argparse
import csv
import sys
from collections import defaultdict
from pathlib import Path
def main(args):
corrections = get_corrections(args.fixed_csv)
for filename, fixes in corrections.items():
fix_csv(filename, fixes)
def get_corrections(path):
with open(path) as file:
reader = csv.DictReader(
file, dialect="excel", delimiter=";", quoting=csv.QUOTE_NONNUMERIC
)
return make_corrections_bag(
(row["filename"], int(row["line"]), row["content"]) for row in reader
)
def make_corrections_bag(iterable):
bag = defaultdict(dict)
for file, line_number, line in iterable:
bag[file][line_number - 1] = line
return bag
def fix_csv(path, fixes):
path = Path(path)
new_path = path.with_suffix(".fixed.csv")
print(f"Fixing {path.name} in {new_path.name}")
with open(path) as file, open(new_path, "w") as output:
for i, line in enumerate(file):
print(fixes.get(i, line).rstrip(), file=output)
def parse_args(argv):
parser = argparse.ArgumentParser()
parser.add_argument("fixed_csv", help="file with the correction")
return parser.parse_args(argv)
if __name__ == "__main__":
main(parse_args(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment