Created
November 18, 2017 02:41
-
-
Save mzpqnxow/29b971610fa8d30169afd9f3016135c4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Useful function for outputting to structured files without any work - AG | |
# | |
def to_file(dest, obj, csv_fields=None, uniq=True, filter_blanks=True, silent=False): | |
""" | |
Dump to a file based on extension | |
If .json, do a standard dump() to the file | |
If .csv, do a CSV with column headers | |
If .lst, do one str/unicode per-line | |
""" | |
try: | |
write_stream = open(dest, 'wb') | |
except OSError as err: | |
print(err) | |
raise | |
if dest.endswith('.json'): | |
# Basic JSON dump | |
json_dump(obj, write_stream, sort_keys=False) | |
elif dest.endswith('.csv'): | |
# Write out a plain CSV file, or one with a header if csv_fields is | |
# specified | |
if isinstance(obj, (set, tuple, list)) is False: | |
raise RuntimeError( | |
'ERROR: csv files must be generated from a list/tuple/set') | |
from json import dumps | |
print(dumps(obj, indent=2)) | |
if len(obj) and isinstance(obj[0], dict): | |
csv_fields = obj[0].keys() | |
if csv_fields is not None: | |
writer = CSVDictWriter(write_stream, fieldnames=csv_fields) | |
writer.writeheader() | |
else: | |
writer = CSVWriter(write_stream) | |
for row in obj: | |
if obj is None: | |
continue | |
if csv_fields is not None: | |
if isinstance(row, dict): | |
row = {k.encode('utf-8'): v.encode( | |
'utf-8') for k, v in row.iteritems()} | |
# new_row[k.encode('utf-8')] = v.encode('utf-8') | |
writer.writerow(row) | |
elif csv_fields is not None: | |
writer.writerow(dict(zip(csv_fields, row))) | |
else: | |
raise RuntimeError('unknown type for row') | |
else: | |
writer.writerow(row) | |
elif dest.endswith('.lst'): | |
if isinstance(obj, (set, tuple, list)) is False: | |
raise RuntimeError('ERROR: raw/.lst dump object must be set/tuple/list') | |
if uniq is True: | |
obj = set(obj) | |
for row in obj: | |
if isinstance(obj, (str, unicode)) is False: | |
raise RuntimeError( | |
'ERROR: raw/.lst files must be list of strings') | |
if filter_blanks is True and row.strip() == '': | |
continue | |
write_stream.write(row + '\n') | |
else: | |
# Unknown extension, assume list of strings | |
print('WARN: unknown file extension, dumping as list of strings') | |
for row in obj: | |
if not isinstance(row, str): | |
raise RuntimeError( | |
'ERROR: lst files must be list of strings') | |
write_stream.write(row.strip() + '\n') | |
write_stream.close() | |
if silent is False: | |
print('--- Object dumped to file %s ...' % (dest)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is buggy and not very unicode friendly, don't use it, sorry..