Skip to content

Instantly share code, notes, and snippets.

@mgeeky
Created March 22, 2016 17:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgeeky/15d0e53c133c6e1ec08f to your computer and use it in GitHub Desktop.
Save mgeeky/15d0e53c133c6e1ec08f to your computer and use it in GitHub Desktop.
Phrack e-zines renaming utility. When having phrack philes downloaded, it will crawl through the phrack.org, collect articles captions and apply them to files.
#!/usr/bin/python
# This script traverses on PHRACK e-zines directory
# and for each downloads corresponding contents. This
# is used to name every file (phile) in issue directory
# with a name as title from webpage contents.
#
# MGeeky, 2012
import os
import urllib
import string
from sys import exit
import re
import HTMLParser
# G L O B A L S
# directory with phrack ezines
g_PhrackDir = r"d:\ebooks\SECURITY\MAGAZINES\PHRACK"
# File name format, must have following tokens:
# %(issue)d, %(phile)d, %(name)s.
g_NameFmt = "p%(issue)02d-0x%(phile)02x - %(author)s - %(name)s.txt"
###################################
def RenFiles(dir):
issue = int(re.match(".*phrack(\d{1,2})", dir).groups(None)[0])
assert issue > 0
if issue == None:
print "\t[!] Error while parsing dir name:", dir
return
url = "http://phrack.org/issues/%d/1.html" % issue
h = HTMLParser.HTMLParser()
for root, dirs, files in os.walk(dir):
philes = len(files)
print "\n[>] Renaming #%02d issue philes. There are %d of them." \
% (issue, philes)
# opening url resource with list off philes
u = urllib.urlopen(url)
if not u:
print "\t[!] Cannot download '%s' resource" % url
return
page = u.read()
(names, authors) = parsePage(page)
if not page:
print "\t[!] Cannot download %d issue!" % issue
exit(1)
for f in files:
if ".tar.gz" in f:
continue
m = re.match(r"(\d{1,2})\.txt", f, re.I)
if not m:
print "\t[!] Cannot find phile: %d of #%d issue" \
% (phile, issue)
continue
phile = int(m.group(1)) - 1
# exact renaming...
#
name = names[phile]
author = authors[phile]
new = g_NameFmt % {"issue":issue, "phile":phile, "author":author, "name":name}
if f == new or not name:
continue
# some corrections...
new = h.unescape(new)
mapping = {'<':'-', '>':'-', ':':'-', '"':'-', '/':'-', '\\':'-', '|':'-', '?':'-', '*':'-'}
for k, v in mapping.iteritems():
new = new.replace(k, v)
newf = os.path.join(root, new)
oldf = os.path.join(root, f)
try:
os.rename(oldf, newf)
print "\t", f, "->", new
except:
print "\t[!] Couldn't rename file: '%s' !" % newf
###################################
def parsePage(page):
names = []
authors = []
rex = r'<tr><td align="left"><a href=".+#article">(.+)<.a><.td><td align="right">(.+)<.td><.tr>'
for m in re.finditer(rex, page, re.I | re.M):
names.append(m.group(1))
authors.append(m.group(2))
assert len(names) == len(authors) and len(names) > 0
return (names, authors)
###################################
if __name__ == '__main__':
print "\n[+] PHRACK magazine files namer"
for root, dirs, files in os.walk(g_PhrackDir):
for d in dirs:
RenFiles(os.path.join(g_PhrackDir, d))
print "\nEnd of script."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment