emileaben/bulk-ris-lookup.py

## bulk-ris-lookup.py
#!/usr/bin/env python

'''
Prototype tool for bulk IP->AS lookups for a specific date
Downloads RIS data locally (using CAIDA BGPSTREAM)
expects IP addresses/prefixes on STDIN (IPv4 and IPv6)
takes a single command-line argument which is the DATE for which to download the table (any format convertable by arrow will do)
example use:
cat file_with_maaaaany_ips | ./bulk-ris-lookup.py 2009-03-22
'''

import arrow
import time
import sys
from radix import Radix
from _pybgpstream import BGPStream, BGPRecord, BGPElem
stream = BGPStream()
rec = BGPRecord()

input_time = sys.argv[1]
###
timestamp = arrow.get( input_time ).timestamp

# fudge by 12 minutes to allow for creating of ribs
now = time.time()
fudge_secs = 12*60
if now - timestamp < fudge_secs:
   old_timestamp = timestamp
   timestamp = now - fudge_secs
   print >>sys.stderr, "timestamp to close to realtime, adjusted by %s" % ( timestamp - old_timestamp )

# processing RIS only because of 8hrs dumps (rv is different)
stream.add_filter('project', 'ris')
stream.add_filter('record-type', 'ribs')
# test
#stream.add_filter('collector','rrc11')
stream.add_interval_filter(timestamp-8*3600, timestamp)

# start the stream
stream.start()
print >>sys.stderr, "start loading BGP data"

t1=time.time()

rtree=Radix()

while(stream.get_next_record(rec)):
   elem = rec.get_next_elem()
   while(elem):
      # as path
      path = elem.fields['as-path']
      pfx = elem.fields['prefix']
      # as list
      ases = path.split(" ")
      ases.reverse()
      origin = ases[0]
      rnode = rtree.search_exact( pfx )
      if rnode:
         rnode.data['origin'].add( origin )
      else:
         rnode = rtree.add( pfx )
         rnode.data['origin'] = set([ origin ])

      elem = rec.get_next_elem()

t2=time.time()

print >>sys.stderr, "finished loading BGP data (in %s s)" % ( t2 - t1 )

for line in sys.stdin:
   line = line.rstrip('\n')
   origin = '-'
   try:
      rnode = rtree.search_best( line )
      if rnode:
         # account for multi-origin:
         origin = '|'.join( rnode.data['origin'] )
   except:
      pass
   print "{line}\t{origin}".format( line=line, origin=origin )
	#!/usr/bin/env python

	'''
	Prototype tool for bulk IP->AS lookups for a specific date
	Downloads RIS data locally (using CAIDA BGPSTREAM)
	expects IP addresses/prefixes on STDIN (IPv4 and IPv6)
	takes a single command-line argument which is the DATE for which to download the table (any format convertable by arrow will do)
	example use:
	cat file_with_maaaaany_ips \| ./bulk-ris-lookup.py 2009-03-22
	'''

	import arrow
	import time
	import sys
	from radix import Radix
	from _pybgpstream import BGPStream, BGPRecord, BGPElem
	stream = BGPStream()
	rec = BGPRecord()

	input_time = sys.argv[1]
	###
	timestamp = arrow.get( input_time ).timestamp

	# fudge by 12 minutes to allow for creating of ribs
	now = time.time()
	fudge_secs = 12*60
	if now - timestamp < fudge_secs:
	old_timestamp = timestamp
	timestamp = now - fudge_secs
	print >>sys.stderr, "timestamp to close to realtime, adjusted by %s" % ( timestamp - old_timestamp )

	# processing RIS only because of 8hrs dumps (rv is different)
	stream.add_filter('project', 'ris')
	stream.add_filter('record-type', 'ribs')
	# test
	#stream.add_filter('collector','rrc11')
	stream.add_interval_filter(timestamp-8*3600, timestamp)

	# start the stream
	stream.start()
	print >>sys.stderr, "start loading BGP data"

	t1=time.time()

	rtree=Radix()

	while(stream.get_next_record(rec)):
	elem = rec.get_next_elem()
	while(elem):
	# as path
	path = elem.fields['as-path']
	pfx = elem.fields['prefix']
	# as list
	ases = path.split(" ")
	ases.reverse()
	origin = ases[0]
	rnode = rtree.search_exact( pfx )
	if rnode:
	rnode.data['origin'].add( origin )
	else:
	rnode = rtree.add( pfx )
	rnode.data['origin'] = set([ origin ])

	elem = rec.get_next_elem()

	t2=time.time()

	print >>sys.stderr, "finished loading BGP data (in %s s)" % ( t2 - t1 )

	for line in sys.stdin:
	line = line.rstrip('\n')
	origin = '-'
	try:
	rnode = rtree.search_best( line )
	if rnode:
	# account for multi-origin:
	origin = '\|'.join( rnode.data['origin'] )
	except:
	pass
	print "{line}\t{origin}".format( line=line, origin=origin )