Skip to content

Instantly share code, notes, and snippets.

@ygivenx
Created April 15, 2019 21:54
Show Gist options
  • Save ygivenx/fe249d1b39f90a509021661e2b679399 to your computer and use it in GitHub Desktop.
Save ygivenx/fe249d1b39f90a509021661e2b679399 to your computer and use it in GitHub Desktop.
Parse vertical data
Asia
Instance REGION-1:
ABCD_RV
Interface: fastethernet01/01
Last state change: 0h54m44s ago
Sysid: 01441
Speaks: IPv4
Topologies:
ipv4-unicast
SAPA: point-to-point
Area Address(es):
441
IPv4 Address(es):
1.1.1.1
EFGH_RV
Interface: fastethernet01/01
Last state change: 0h54m44s ago
Sysid: 01442
Speaks: IPv4
Topologies:
ipv4-unicast
SAPA: point-to-point
Area Address(es):
442
IPv4 Address(es):
1.1.1.2
EU
Instance REGION-2:
IJKL_RV
Interface: fastethernet01/01
Last state change: 0h54m44s ago
Sysid: 01443
Speaks: IPv4
Topologies:
ipv4-unicast
SAPA: point-to-point
Area Address(es):
443
IPv4 Address(es):
1.1.1.3
import re
import pandas as pd
from collections import defaultdict
def parse_text(data_blocks):
"""
Generate a dict which could be converted easily to a pandas dataframe
:param data_blocks: splittable data
:return: dict with row values for every column
"""
final_data = defaultdict(list)
for key1 in data_blocks.keys():
for key2 in data_blocks.get(key1):
for key3 in data_blocks.get(key1).get(key2):
final_data['area'].append(key1)
final_data['instance'].append(key2)
final_data['sub_instance'].append(key3)
for items in data_blocks[key1][key2][key3].split(";"):
print(items)
if items.isspace() or len(items) == 0:
continue
a,b = re.split(r':\s*', items)
final_data[a].append(b)
return final_data
def nested_dict(depth=3):
return (defaultdict(str) if depth<2 else defaultdict(lambda: nested_dict(depth-1)))
def main():
_level_1 = re.compile(r'instance region.*', re.IGNORECASE)
with open('stack_formatting.txt') as f:
data = f.readlines()
"""
Format data so that it could be split easily
"""
data_blocks = nested_dict()
header = None
instance = None
top_header = None
for line in data:
line = line.strip()
if line == "Asia" or line == "EU":
top_header = line
elif _level_1.match(line):
header = line
else:
if "_RV" in line:
instance = line
elif not line.endswith(":"):
data_blocks[top_header][header][instance] += line + ";"
else:
data_blocks[top_header][header][instance] += line
print(pd.DataFrame(parse_text(data_blocks)))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment