Skip to content

Instantly share code, notes, and snippets.

@SteelPangolin
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SteelPangolin/ce4a0644e15a8f95ce39 to your computer and use it in GitHub Desktop.
Save SteelPangolin/ce4a0644e15a8f95ce39 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Translate a CSV document with a header to a YAML document that is a list of maps.
Uses constant memory and keeps field names in the original order.
"""
import sys
import csv
import yaml # requires PyYAML
def read_csv(input):
"""
:param input: stream
:return: iterator of (field name, field value) pairs
"""
csv_reader = csv.reader(input)
fields = next(csv_reader)
for row in csv_reader:
yield zip(fields, row)
def write_yaml(pairs, output):
"""
:param pairs: iterator of (field name, field value) pairs
:param output: stream
"""
yaml_writer = yaml.Dumper(output)
yaml_writer.open()
yaml_writer.emit(yaml.DocumentStartEvent())
yaml_writer.emit(yaml.SequenceStartEvent(anchor=None, tag=None, implicit=True))
for pair in pairs:
yaml_writer.emit(yaml.MappingStartEvent(anchor=None, tag=None, implicit=True))
for name, value in pair:
yaml_writer.emit(yaml.ScalarEvent(anchor=None, tag=None, implicit=(True, True), value=name))
yaml_writer.emit(yaml.ScalarEvent(anchor=None, tag=None, implicit=(True, True), value=value))
yaml_writer.emit(yaml.MappingEndEvent())
yaml_writer.emit(yaml.SequenceEndEvent())
yaml_writer.emit(yaml.DocumentEndEvent())
yaml_writer.close()
def main(input, output):
write_yaml(read_csv(input), output)
if __name__ == '__main__':
main(sys.stdin, sys.stdout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment