Extract Ole2Link from documents
#!/usr/bin/env python2 | |
import re | |
import sys | |
import zipfile | |
from lxml.etree import XML | |
from io import BytesIO | |
def url_from_elem(e): | |
u = None | |
for n1 in ('Target','target'): | |
u = x.attrib.get(n1,None) | |
if u: | |
break | |
return u | |
def look_for_rel(e): | |
for c in e.iterchildren(): | |
if c.tag.lower().endswith('relationship'): | |
for t in c.keys(): | |
if t.lower() == 'target': | |
yield c.attrib[t] | |
else: | |
look_for_rel(c) | |
def fix_zip(data): | |
idx = data.find("PK\x05\x06") | |
if len(data) < idx + 22: | |
print('[!] broken zip - missing {0:d} bytes'.format( idx + 22 - len(data) )) | |
data = data.ljust(idx + 22, "\x00") | |
return data | |
with open(sys.argv[1]) as f: | |
data = fix_zip(f.read()) | |
bin = BytesIO(data) | |
z = zipfile.ZipFile(bin) | |
decoded = [] | |
for fi in z.filelist: | |
if '/_rels/' in fi.filename and fi.filename.endswith('.rels'): | |
data = z.read(fi.filename) | |
xml = XML(data) | |
for u in look_for_rel(xml): | |
u,n= re.subn('%([0-9a-fA-F]{2})',lambda g: g.group(1).decode('hex'), u ) | |
if u.lower().startswith('http'): | |
try: | |
print('[+] HTTP-Ole2Link in {0} in file {1}'.format(u,fi.filename).encode('utf-8')) | |
except: | |
print `u` | |
elif u.lower().startswith('script'): | |
print('[+] Script-Ole2Link in {0} in file {1}'.format(u,fi.filename)) | |
if n > 1: | |
decoded.append((u, fi.filename)) | |
for u, f in decoded: | |
print('URLEncoded link {0} in {1}'.format(u,f)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment