Last active
November 2, 2021 15:54
-
-
Save prabhu/3effaab37e6bdcac6d37b7cc89b8be1c to your computer and use it in GitHub Desktop.
Python: Look for Bi-directional unicode characters in a file or directory. CVE-2021-42574 / https://trojansource.codes/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /usr/bin/env python3 | |
# python3 bidi-check.py <file or directory> | |
# This script looks for bi-directional unicode characters. Useful to look for CVE-2021-42574 / https://trojansource.codes/ | |
import argparse | |
import codecs | |
import os | |
import sys | |
import unicodedata | |
bidi_dict = {} | |
def build_dataset(): | |
for i in range(255, 0x10000): | |
char = chr(i) | |
category = unicodedata.category(char) | |
bidi_category = unicodedata.bidirectional(char) | |
name = unicodedata.name(char, "") | |
if ( | |
"bracket" in name.lower() | |
or "paren" in name.lower() | |
or "override" in name.lower() | |
or category in ["Ps", "Pe", "Cf"] | |
): | |
# print("U+%04x %s %s %c" % (i, category, name, char)) | |
bidi_dict[i] = { | |
"category": category, | |
"name": name, | |
"hex": hex(i), | |
"unicode": "{0}".format(char), | |
"bidi_category": bidi_category, | |
} | |
print(f"Bidi dataset contains {len(bidi_dict)}") | |
def check_char(char): | |
return bidi_dict.get(ord(char)) | |
def check_file(afile): | |
found = False | |
with codecs.open(afile, encoding="utf-8-sig", errors="ignore") as infile: | |
for line in infile: | |
for char in line: | |
bidi_obj = check_char(char) | |
if bidi_obj: | |
found = True | |
print( | |
f"File {afile} contains unicode bi-directional character {bidi_obj}" | |
) | |
if not found: | |
print(f"File {afile} doesn't contain unicode bi-directional character") | |
return found | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("f", help="File to check for Bidi") | |
args = parser.parse_args() | |
build_dataset() | |
file_or_dir = args.f | |
if not file_or_dir or not os.path.exists(file_or_dir): | |
print("Provide a file or directory to scan") | |
sys.exit(1) | |
if os.path.isdir(file_or_dir): | |
for root, dirs, files in os.walk(file_or_dir): | |
for afile in files: | |
check_file(os.path.join(root, afile)) | |
else: | |
check_file(file_or_dir) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment