Skip to content

Instantly share code, notes, and snippets.

@mgeeky
Created March 22, 2016 17:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgeeky/bf0e367098047330b58b to your computer and use it in GitHub Desktop.
Save mgeeky/bf0e367098047330b58b to your computer and use it in GitHub Desktop.
RFC files gathering and renaming utility.
#!/usr/bin/python
import httplib
from sys import exit
from string import strip, replace
import os
from os.path import normpath
import re
g_MAX_RFC_NUMBER = 5200
g_NOT_FOUND_ERROR = "404: Page Not Found"
if __name__ == '__main__':
print "Simple script that downloads every RFC document, "\
"names it and\n stores on your local machin"\
"e (in dir: 'RFC')"
try: os.mkdir( 'RFC')
except: pass
os.chdir('RFC')
h = 0
try:
h = httplib.HTTPConnection('www.ietf.org')
except:
print '[!] Error during HTTPConnection(\'www.ietf.org\')'
exit(0)
pages_re = "[\s|\w|\d]{1,}\[Page\s([0-9]{1.5})\]"
for i in range(g_MAX_RFC_NUMBER):
print "Processing RFC%d..." % i
resp = 0
try:
h.request("GET", "/rfc/rfc"+`i`+".txt")
resp = h.getresponse()
except:
print "\tError. Omitting this RFC."
continue
if resp != 0:
data = ""
data = resp.read()
if data.find("was never issued.") != -1:
print "\t"+resp.read()
continue
if data.find(g_NOT_FOUND_ERROR) != -1:
print "\tThere is no any RFC%d." % i
continue
n = 'rfc%d.txt' % i
f = open( n, 'w')
f.write(data)
title = ""
pages = 0
f.close()
f = open( n, 'r')
empty = 0
cnt = 0
cnt1 = 0
del data
for j in f.readlines():
if cnt == 0:
if re.match(r'\w{1,}', j) != None:
cnt = 1
continue
if cnt == 1 and cnt1 == 0:
if re.match(r'\w{1,}', j) == None:
cnt1 = 1
if cnt1 == 1:
if re.match(r'\w{1,}', j) == None:
empty += 1
elif empty > 0:
if re.match('^\s{1,}\w*', j):
title = strip(j)
break
del cnt
del empty
for j in reversed( f.readlines() ):
pg = re.match( pages_re, j)
if pg != None:
pages = int(pg.group(1))
break
title = ""
if len(title) > 0:
print '\tPages %d.' % pages
title = title.replace('\\', '')
newname = "RFC%d - %s (pgs %d).txt" \
% (i, title, pages)
newname = normpath(newname)
print 'New name: "%s"' % newname
f.close()
cwd = os.getcwd()+"\\"
os.rename( cwd+n, cwd+newname)
h.close()
raw_input("Press ENTER to exit.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment