Skip to content

Instantly share code, notes, and snippets.

@raffysommy
Last active May 13, 2021 11:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raffysommy/4bc06cbd59ca582faff4a950e3adf7c9 to your computer and use it in GitHub Desktop.
Save raffysommy/4bc06cbd59ca582faff4a950e3adf7c9 to your computer and use it in GitHub Desktop.
Caida AS2Org Python converter
#!/usr/bin/env python3
from datetime import datetime
import pandas as pd
import argparse
import sys
import gzip
import os
def convert(filename):
head, tail = os.path.split(filename)
date = datetime.strptime(tail,'%Y%m%d.as-org2info.txt.gz')
with gzip.open(filename, "r") as fin:
content = fin.readlines()
index_org = [
x
for x in range(len(content))
if "# format:org_id" in str(content[x])
][0]
index_asn = [
x
for x in range(len(content))
if "# format:aut" in str(content[x])
][0]
org_df = pd.read_csv(
filename, delimiter="|", skiprows=index_org, nrows=index_asn - index_org - 1, error_bad_lines=False, warn_bad_lines=False
).rename(columns={"# format:org_id": "org_id"})[["org_id", "org_name", "country"]]
## no idea of why but it works
if(date.year>=2014 or (date.year==2013 and date.month >=4)):
index_asn=index_asn-1
asn_df = pd.read_csv(filename, delimiter="|", skiprows=index_asn).rename(
columns={"# format:aut": "asn", "aut_name": "asn_name"}
)[["asn","changed","asn_name","org_id","source"]]
asn_df["asn"]=pd.to_numeric(asn_df["asn"])
out_dir=date.strftime("converted/year=%Y/month=%m/day=%d")
os.makedirs(out_dir, exist_ok=True)
asn_df.merge(org_df, on="org_id").sort_values('asn').to_csv(out_dir+'/org2info.csv')
print('Converted data stored in in: '+ out_dir+'/org2info.csv')
parser = argparse.ArgumentParser(description='Convert AS2ORG data provided by CAIDA to parsable CSV')
parser.add_argument('input', metavar='input', type=str,
help='The txt.gz provided by CAIDA')
try:
args = parser.parse_args()
except:
parser.print_help()
sys.exit(0)
convert(args.input)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment