Created
April 15, 2019 21:54
-
-
Save ygivenx/fe249d1b39f90a509021661e2b679399 to your computer and use it in GitHub Desktop.
Parse vertical data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Asia | |
Instance REGION-1: | |
ABCD_RV | |
Interface: fastethernet01/01 | |
Last state change: 0h54m44s ago | |
Sysid: 01441 | |
Speaks: IPv4 | |
Topologies: | |
ipv4-unicast | |
SAPA: point-to-point | |
Area Address(es): | |
441 | |
IPv4 Address(es): | |
1.1.1.1 | |
EFGH_RV | |
Interface: fastethernet01/01 | |
Last state change: 0h54m44s ago | |
Sysid: 01442 | |
Speaks: IPv4 | |
Topologies: | |
ipv4-unicast | |
SAPA: point-to-point | |
Area Address(es): | |
442 | |
IPv4 Address(es): | |
1.1.1.2 | |
EU | |
Instance REGION-2: | |
IJKL_RV | |
Interface: fastethernet01/01 | |
Last state change: 0h54m44s ago | |
Sysid: 01443 | |
Speaks: IPv4 | |
Topologies: | |
ipv4-unicast | |
SAPA: point-to-point | |
Area Address(es): | |
443 | |
IPv4 Address(es): | |
1.1.1.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import pandas as pd | |
from collections import defaultdict | |
def parse_text(data_blocks): | |
""" | |
Generate a dict which could be converted easily to a pandas dataframe | |
:param data_blocks: splittable data | |
:return: dict with row values for every column | |
""" | |
final_data = defaultdict(list) | |
for key1 in data_blocks.keys(): | |
for key2 in data_blocks.get(key1): | |
for key3 in data_blocks.get(key1).get(key2): | |
final_data['area'].append(key1) | |
final_data['instance'].append(key2) | |
final_data['sub_instance'].append(key3) | |
for items in data_blocks[key1][key2][key3].split(";"): | |
print(items) | |
if items.isspace() or len(items) == 0: | |
continue | |
a,b = re.split(r':\s*', items) | |
final_data[a].append(b) | |
return final_data | |
def nested_dict(depth=3): | |
return (defaultdict(str) if depth<2 else defaultdict(lambda: nested_dict(depth-1))) | |
def main(): | |
_level_1 = re.compile(r'instance region.*', re.IGNORECASE) | |
with open('stack_formatting.txt') as f: | |
data = f.readlines() | |
""" | |
Format data so that it could be split easily | |
""" | |
data_blocks = nested_dict() | |
header = None | |
instance = None | |
top_header = None | |
for line in data: | |
line = line.strip() | |
if line == "Asia" or line == "EU": | |
top_header = line | |
elif _level_1.match(line): | |
header = line | |
else: | |
if "_RV" in line: | |
instance = line | |
elif not line.endswith(":"): | |
data_blocks[top_header][header][instance] += line + ";" | |
else: | |
data_blocks[top_header][header][instance] += line | |
print(pd.DataFrame(parse_text(data_blocks))) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment