Skip to content

Instantly share code, notes, and snippets.

@prabhu
Last active November 2, 2021 15:54
Show Gist options
  • Save prabhu/3effaab37e6bdcac6d37b7cc89b8be1c to your computer and use it in GitHub Desktop.
Save prabhu/3effaab37e6bdcac6d37b7cc89b8be1c to your computer and use it in GitHub Desktop.
Python: Look for Bi-directional unicode characters in a file or directory. CVE-2021-42574 / https://trojansource.codes/
# /usr/bin/env python3
# python3 bidi-check.py <file or directory>
# This script looks for bi-directional unicode characters. Useful to look for CVE-2021-42574 / https://trojansource.codes/
import argparse
import codecs
import os
import sys
import unicodedata
bidi_dict = {}
def build_dataset():
for i in range(255, 0x10000):
char = chr(i)
category = unicodedata.category(char)
bidi_category = unicodedata.bidirectional(char)
name = unicodedata.name(char, "")
if (
"bracket" in name.lower()
or "paren" in name.lower()
or "override" in name.lower()
or category in ["Ps", "Pe", "Cf"]
):
# print("U+%04x %s %s %c" % (i, category, name, char))
bidi_dict[i] = {
"category": category,
"name": name,
"hex": hex(i),
"unicode": "{0}".format(char),
"bidi_category": bidi_category,
}
print(f"Bidi dataset contains {len(bidi_dict)}")
def check_char(char):
return bidi_dict.get(ord(char))
def check_file(afile):
found = False
with codecs.open(afile, encoding="utf-8-sig", errors="ignore") as infile:
for line in infile:
for char in line:
bidi_obj = check_char(char)
if bidi_obj:
found = True
print(
f"File {afile} contains unicode bi-directional character {bidi_obj}"
)
if not found:
print(f"File {afile} doesn't contain unicode bi-directional character")
return found
def main():
parser = argparse.ArgumentParser()
parser.add_argument("f", help="File to check for Bidi")
args = parser.parse_args()
build_dataset()
file_or_dir = args.f
if not file_or_dir or not os.path.exists(file_or_dir):
print("Provide a file or directory to scan")
sys.exit(1)
if os.path.isdir(file_or_dir):
for root, dirs, files in os.walk(file_or_dir):
for afile in files:
check_file(os.path.join(root, afile))
else:
check_file(file_or_dir)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment