Skip to content

Instantly share code, notes, and snippets.

@emileaben
Last active May 22, 2016 17:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save emileaben/0d832b78e00d2355bac520afb9603b82 to your computer and use it in GitHub Desktop.
Save emileaben/0d832b78e00d2355bac520afb9603b82 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
'''
Prototype tool for bulk IP->AS lookups for a specific date
Downloads RIS data locally (using CAIDA BGPSTREAM)
expects IP addresses/prefixes on STDIN (IPv4 and IPv6)
takes a single command-line argument which is the DATE for which to download the table (any format convertable by arrow will do)
example use:
cat file_with_maaaaany_ips | ./bulk-ris-lookup.py 2009-03-22
'''
import arrow
import time
import sys
from radix import Radix
from _pybgpstream import BGPStream, BGPRecord, BGPElem
stream = BGPStream()
rec = BGPRecord()
input_time = sys.argv[1]
###
timestamp = arrow.get( input_time ).timestamp
# fudge by 12 minutes to allow for creating of ribs
now = time.time()
fudge_secs = 12*60
if now - timestamp < fudge_secs:
old_timestamp = timestamp
timestamp = now - fudge_secs
print >>sys.stderr, "timestamp to close to realtime, adjusted by %s" % ( timestamp - old_timestamp )
# processing RIS only because of 8hrs dumps (rv is different)
stream.add_filter('project', 'ris')
stream.add_filter('record-type', 'ribs')
# test
#stream.add_filter('collector','rrc11')
stream.add_interval_filter(timestamp-8*3600, timestamp)
# start the stream
stream.start()
print >>sys.stderr, "start loading BGP data"
t1=time.time()
rtree=Radix()
while(stream.get_next_record(rec)):
elem = rec.get_next_elem()
while(elem):
# as path
path = elem.fields['as-path']
pfx = elem.fields['prefix']
# as list
ases = path.split(" ")
ases.reverse()
origin = ases[0]
rnode = rtree.search_exact( pfx )
if rnode:
rnode.data['origin'].add( origin )
else:
rnode = rtree.add( pfx )
rnode.data['origin'] = set([ origin ])
elem = rec.get_next_elem()
t2=time.time()
print >>sys.stderr, "finished loading BGP data (in %s s)" % ( t2 - t1 )
for line in sys.stdin:
line = line.rstrip('\n')
origin = '-'
try:
rnode = rtree.search_best( line )
if rnode:
# account for multi-origin:
origin = '|'.join( rnode.data['origin'] )
except:
pass
print "{line}\t{origin}".format( line=line, origin=origin )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment