Skip to content

Instantly share code, notes, and snippets.

@impredicative
Last active September 28, 2017 14:40
Show Gist options
  • Save impredicative/57521fd21f4c3a5c0805c05bf9d54086 to your computer and use it in GitHub Desktop.
Save impredicative/57521fd21f4c3a5c0805c05bf9d54086 to your computer and use it in GitHub Desktop.
pandas_util
import io
import re
import pandas as pd
def _prepare_pipe_separated_str(str_input):
substitutions = [
('^ *', ''), # Remove leading spaces
(' *$', ''), # Remove trailing spaces
(r' *\| *', '|'), # Remove spaces between columns
]
if all(line.lstrip().startswith('|') and line.rstrip().endswith('|') for line in str_input.strip().split('\n')):
substitutions.extend([
(r'^\|', ''), # Remove redundant leading delimiter
(r'\|$', ''), # Remove redundant trailing delimiter
])
for pattern, replacement in substitutions:
str_input = re.sub(pattern, replacement, str_input, flags=re.MULTILINE)
return str_input
def read_pipe_separated_str(str_input):
"""Read a Pandas object from a pipe-separated table contained within a string.
Example:
| odcd_wacs | cs_wacs | automation_eligible |
| | | True |
| | 0 | False |
| | 576 | True |
| 300 | 600 | True |
The leading and trailing pipes are optional, but if one is present, so must be the other.
In PyCharm, the "Pipe Table Formatter" plugin has a "Format" feature that can be used to neatly format a table.
"""
str_input = _prepare_pipe_separated_str(str_input)
return pd.read_csv(io.StringIO(str_input), sep='|')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment