Skip to content

Instantly share code, notes, and snippets.

@mzpqnxow
Created November 18, 2017 02:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mzpqnxow/29b971610fa8d30169afd9f3016135c4 to your computer and use it in GitHub Desktop.
Save mzpqnxow/29b971610fa8d30169afd9f3016135c4 to your computer and use it in GitHub Desktop.
#
# Useful function for outputting to structured files without any work - AG
#
def to_file(dest, obj, csv_fields=None, uniq=True, filter_blanks=True, silent=False):
"""
Dump to a file based on extension
If .json, do a standard dump() to the file
If .csv, do a CSV with column headers
If .lst, do one str/unicode per-line
"""
try:
write_stream = open(dest, 'wb')
except OSError as err:
print(err)
raise
if dest.endswith('.json'):
# Basic JSON dump
json_dump(obj, write_stream, sort_keys=False)
elif dest.endswith('.csv'):
# Write out a plain CSV file, or one with a header if csv_fields is
# specified
if isinstance(obj, (set, tuple, list)) is False:
raise RuntimeError(
'ERROR: csv files must be generated from a list/tuple/set')
from json import dumps
print(dumps(obj, indent=2))
if len(obj) and isinstance(obj[0], dict):
csv_fields = obj[0].keys()
if csv_fields is not None:
writer = CSVDictWriter(write_stream, fieldnames=csv_fields)
writer.writeheader()
else:
writer = CSVWriter(write_stream)
for row in obj:
if obj is None:
continue
if csv_fields is not None:
if isinstance(row, dict):
row = {k.encode('utf-8'): v.encode(
'utf-8') for k, v in row.iteritems()}
# new_row[k.encode('utf-8')] = v.encode('utf-8')
writer.writerow(row)
elif csv_fields is not None:
writer.writerow(dict(zip(csv_fields, row)))
else:
raise RuntimeError('unknown type for row')
else:
writer.writerow(row)
elif dest.endswith('.lst'):
if isinstance(obj, (set, tuple, list)) is False:
raise RuntimeError('ERROR: raw/.lst dump object must be set/tuple/list')
if uniq is True:
obj = set(obj)
for row in obj:
if isinstance(obj, (str, unicode)) is False:
raise RuntimeError(
'ERROR: raw/.lst files must be list of strings')
if filter_blanks is True and row.strip() == '':
continue
write_stream.write(row + '\n')
else:
# Unknown extension, assume list of strings
print('WARN: unknown file extension, dumping as list of strings')
for row in obj:
if not isinstance(row, str):
raise RuntimeError(
'ERROR: lst files must be list of strings')
write_stream.write(row.strip() + '\n')
write_stream.close()
if silent is False:
print('--- Object dumped to file %s ...' % (dest))
@mzpqnxow
Copy link
Author

This is buggy and not very unicode friendly, don't use it, sorry..

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment