Skip to content

Instantly share code, notes, and snippets.

@halflings
Created September 3, 2013 12:08
Show Gist options
  • Save halflings/6423003 to your computer and use it in GitHub Desktop.
Save halflings/6423003 to your computer and use it in GitHub Desktop.
A small script to extract URLs contained in a .swf file. Doesn't work with LZMA compressed .swf
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import re
import string
def log(text):
print text
def decompress_swf(filename):
with open(filename, 'rb') as fh:
c = fh.read()
ver = c[3]
log('SWF HEADER:')
swf_header = str(c[:8])
log(swf_header)
log('-'*80)
if swf_header.startswith('CWS'):
nc = zlib.decompress( c[8:] )
elif swf_header.startswith('ZWS'):
# nc = lzma.decompress(c[8:])
# This doesn't work for some obscure reason...
raise Exception("LZMA compression not supported")
elif swf_header.startswith('FWS'):
nc = c[8:]
else:
raise Exception("Unknown SWF header: {}".format(c[8:]))
return 'FWS' + ver + c[4:8] + nc
URL_REGEX = "(?:http|ftp|https):\/\/[\w\-_]+(?:\.[\w\-_]+)+(?:[\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?"
if __name__ == '__main__':
swf_data = decompress_swf(sys.argv[1])
# Replacing unprintable characters by ' ' to only extract relevant data
readable_data = str()
for char in swf_data:
readable_data += char if char else ' '
# Other filters are usually too permissive, but absolute links still work great
# on_press_links = set(re.findall('onPress"(\S+)', readable_data))
absolute_links = set(re.findall('({})'.format(URL_REGEX), readable_data))
# mails = re.findall('mailto:(\S+@)', readable_data)
# javascript = re.findall('javascript:(\S+)', readable_data)
# files = re.findall('\S+\.(?:xml|html|js)', readable_data)
log('ABSOLUTE LINKS')
for link in absolute_links:
log(link)
log('-'*80)
@vishal4python
Copy link

freesis_kofia

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment