dhondta/README.md

## README.md

      
    Raw
  

              README.md
            
          
    WLF

This Tinyscript-based tool allows to filter a wordlist from STDIN to STDOUT or an output file. This is particularly interesting for dictionary attacks.
This can be installed using:
$ pip install tinyscript
$ tsm install wlf

This tool is especially useful in the use cases hereafter.
Simple wordlist filtering

Filtering RockYou for all passwords with characters 1 to 5, any lowercase or uppercase letter or the special characters !-_@:
$ cat rockou.txt | wlf -a "[1-5]aA[!-_@]" -s > rockyou_filtered.txt

Dictionary attack with a wordlist from a memory image on-the-fly

Cracking a ZIP file using Fcrackzip from the strings of a forensics image:
$ strings memory.raw | wlf | fcrackzip -u -D -p /dev/stdin archive.zip


PASSWORD FOUND!!!!: pw == Cyb3rs3curit3


Cracking a ZIP file using John the Ripper from the strings of a forensics image:
$ zip2john archive.zip > archive.zip.hash
$ strings memory.raw | wlf -a "aA1!" | john --stdin archive.zip.hash
[...]


## word-list-filter.py
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
from tinyscript import *


__author__ = "Alexandre D'Hondt"
__version__ = "1.3"
__copyright__ = "A. D'Hondt"
__license__ = "agpl-3.0"
__doc__ = """
This simple tool allows to filter a wordlist from STDIN to STDOUT or an output
 file. This is particularly interesting for dictionary attacks.

"""
__examples__ = [
    "-r passwords.lst -w passwords-filtered.lst",
    "-d < passwords.lst > passwords-filtered.lst",
    "-d < passwords.lst | fcrackzip -u -D -p /dev/stdin archive.zip",
    "-a \"aA1!\" -r passwords.lst | john my.hash --stdin",
]

SCRIPTNAME_FORMAT = "acronym"


def get_alphabet(alphabet):
    """ Transforms the input alphabet to a full character set.

    >>> get_alphabet("Aa1")
    'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
    >>> get_alphabet("1[f-a]")
    '0123456789fedcba'
    >>> get_alphabet("[1-4][f-a]")
    '1234fedcba'
    >>> get_alphabet("[1-4][f-a]!")
    '1234fedcba!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
    >>> get_alphabet("[1-4][f-a]!*")
    '1234fedcba!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~056789ghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ \t\n\r\x0b\x0c'
    """
    s = ""
    for c in re.findall(r"(\[.+?\]|.)", alphabet):
        a = {
            '*': string.printable,
            'a': string.ascii_lowercase,
            'A': string.ascii_uppercase,
            '1': string.digits,
            '!': string.punctuation,
        }.get(c)
        # handle interval
        if c.startswith("[") and c.endswith("]"):
            if len(c) == 5 and c[2] == "-":
                c1, c2 = c[1], c[3]
                if c1.isupper() and c2.isupper():
                    a = string.ascii_uppercase
                elif c1.islower() and c2.islower():
                    a = string.ascii_lowercase
                elif c1.isdigit() and c2.isdigit():
                    a = string.digits
                if a is not None:
                    start, end = a.index(c1), a.index(c2)
                    for k in range(start, end + 1, 1) if start < end else \
                             range(start, end - 1, -1):
                        i = a[k]
                        if i not in s:
                            s += i
                    continue
            for i in c[1:-1]:
                if i in s:
                    s += i
        # or handle a single char
        elif a is not None:
            for i in a:
                if i not in s:
                    s += i
        else:
            raise ValueError("Bad character '{}'".format(c))
    return s


def filter_word(wordlist, length, alphabet):
    check = lambda l: length[0] <= len(l) <= length[1] and \
                      all(c in alphabet for c in l)
    if wordlist is None:
        for l in ts.stdin_pipe():
            l = l.rstrip()
            if check(l):
                yield l
    else:
        with open(wordlist, 'rb') as f:
            for l in f:
                l = l.strip()
                if check(l):
                    yield l


if __name__ == '__main__':
    parser.add_argument("-a", "--alphabet", default="aA1",
                        help="character set to filter",
                        note="characters order matters !\n"
                             "Use [...] to enclose absolute characters\n"
                             "Special characters:\n- *: printables\n"
                             "- a: lowercase letters\n- A: uppercase letters\n"
                             "- 1: digits\n- !: punctuation characters\n")
    parser.add_argument("-d", "--duplicates", action="store_false",
                        help="filter duplicates", note="very slow")
    parser.add_argument("-l", "--length", default="6-16",
                        type=ts.str_matches(r"^\d+\-\d+$"),
                        help="word length interval")
    parser.add_argument("-r", "--read", type=ts.file_exists,
                        help="read from file")
    parser.add_argument("-s", "--sort", action="store_true",
                        help="sort output file",
                        note="only works with -w ; relies on 'sort'")
    parser.add_argument("-w", "--write", type=ts.file_does_not_exist,
                        help="write to file")
    initialize()
    l = list(map(int, args.length.split("-")))
    if len(l) == 1:
        l.insert(0, 0)
    args.length = l
    args.duplicates = True if args.write and args.sort else args.duplicates
    words = collections.deque(maxlen=256*1024*1024//max(args.length[1], 1))
    try:
        with open(args.write, 'wb') if args.write else sys.stdout as out:
            for w in filter_word(args.read, args.length,
                                 b(get_alphabet(args.alphabet))):
                if not args.duplicates:
                    if w in words:
                        continue
                    words.append(w)
                w = w + b("\n")
                if args.write is None:
                    w = w.decode()
                out.write(w)
    except BrokenPipeError:
        pass
    if args.write and args.sort and not WINDOWS:
        c = ["sort", "-u", args.write]
        p = subprocess.Popen(c, stdout=subprocess.PIPE, universal_newlines=True)
        tmp_fn = hashlib.md5(args.write.encode()).hexdigest()
        with open(tmp_fn, 'wb') as tmp:
            for l in iter(p.stdout.readline, ""):
                tmp.write(l.encode())
        os.remove(args.write)
        os.rename(tmp_fn, args.write)
	#!/usr/bin/python3
	# -- coding: UTF-8 --
	from tinyscript import *


	__author__ = "Alexandre D'Hondt"
	__version__ = "1.3"
	__copyright__ = "A. D'Hondt"
	__license__ = "agpl-3.0"
	__doc__ = """
	This simple tool allows to filter a wordlist from STDIN to STDOUT or an output
	file. This is particularly interesting for dictionary attacks.

	"""
	__examples__ = [
	"-r passwords.lst -w passwords-filtered.lst",
	"-d < passwords.lst > passwords-filtered.lst",
	"-d < passwords.lst \| fcrackzip -u -D -p /dev/stdin archive.zip",
	"-a \"aA1!\" -r passwords.lst \| john my.hash --stdin",
	]

	SCRIPTNAME_FORMAT = "acronym"


	def get_alphabet(alphabet):
	""" Transforms the input alphabet to a full character set.

	>>> get_alphabet("Aa1")
	'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
	>>> get_alphabet("1[f-a]")
	'0123456789fedcba'
	>>> get_alphabet("[1-4][f-a]")
	'1234fedcba'
	>>> get_alphabet("[1-4][f-a]!")
	'1234fedcba!"#$%&\'()*+,-./:;<=>?@[\\]^_`{\|}~'
	>>> get_alphabet("[1-4][f-a]!*")
	'1234fedcba!"#$%&\'()*+,-./:;<=>?@[\\]^_`{\|}~056789ghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ \t\n\r\x0b\x0c'
	"""
	s = ""
	for c in re.findall(r"(\[.+?\]\|.)", alphabet):
	a = {
	'*': string.printable,
	'a': string.ascii_lowercase,
	'A': string.ascii_uppercase,
	'1': string.digits,
	'!': string.punctuation,
	}.get(c)
	# handle interval
	if c.startswith("[") and c.endswith("]"):
	if len(c) == 5 and c[2] == "-":
	c1, c2 = c[1], c[3]
	if c1.isupper() and c2.isupper():
	a = string.ascii_uppercase
	elif c1.islower() and c2.islower():
	a = string.ascii_lowercase
	elif c1.isdigit() and c2.isdigit():
	a = string.digits
	if a is not None:
	start, end = a.index(c1), a.index(c2)
	for k in range(start, end + 1, 1) if start < end else \
	range(start, end - 1, -1):
	i = a[k]
	if i not in s:
	s += i
	continue
	for i in c[1:-1]:
	if i in s:
	s += i
	# or handle a single char
	elif a is not None:
	for i in a:
	if i not in s:
	s += i
	else:
	raise ValueError("Bad character '{}'".format(c))
	return s


	def filter_word(wordlist, length, alphabet):
	check = lambda l: length[0] <= len(l) <= length[1] and \
	all(c in alphabet for c in l)
	if wordlist is None:
	for l in ts.stdin_pipe():
	l = l.rstrip()
	if check(l):
	yield l
	else:
	with open(wordlist, 'rb') as f:
	for l in f:
	l = l.strip()
	if check(l):
	yield l


	if __name__ == '__main__':
	parser.add_argument("-a", "--alphabet", default="aA1",
	help="character set to filter",
	note="characters order matters !\n"
	"Use [...] to enclose absolute characters\n"
	"Special characters:\n- *: printables\n"
	"- a: lowercase letters\n- A: uppercase letters\n"
	"- 1: digits\n- !: punctuation characters\n")
	parser.add_argument("-d", "--duplicates", action="store_false",
	help="filter duplicates", note="very slow")
	parser.add_argument("-l", "--length", default="6-16",
	type=ts.str_matches(r"^\d+\-\d+$"),
	help="word length interval")
	parser.add_argument("-r", "--read", type=ts.file_exists,
	help="read from file")
	parser.add_argument("-s", "--sort", action="store_true",
	help="sort output file",
	note="only works with -w ; relies on 'sort'")
	parser.add_argument("-w", "--write", type=ts.file_does_not_exist,
	help="write to file")
	initialize()
	l = list(map(int, args.length.split("-")))
	if len(l) == 1:
	l.insert(0, 0)
	args.length = l
	args.duplicates = True if args.write and args.sort else args.duplicates
	words = collections.deque(maxlen=25610241024//max(args.length[1], 1))
	try:
	with open(args.write, 'wb') if args.write else sys.stdout as out:
	for w in filter_word(args.read, args.length,
	b(get_alphabet(args.alphabet))):
	if not args.duplicates:
	if w in words:
	continue
	words.append(w)
	w = w + b("\n")
	if args.write is None:
	w = w.decode()
	out.write(w)
	except BrokenPipeError:
	pass
	if args.write and args.sort and not WINDOWS:
	c = ["sort", "-u", args.write]
	p = subprocess.Popen(c, stdout=subprocess.PIPE, universal_newlines=True)
	tmp_fn = hashlib.md5(args.write.encode()).hexdigest()
	with open(tmp_fn, 'wb') as tmp:
	for l in iter(p.stdout.readline, ""):
	tmp.write(l.encode())
	os.remove(args.write)
	os.rename(tmp_fn, args.write)