Forked from tillson/gist:620e8ef87bc057f25b0a27c423433fda
Created
October 17, 2020 02:13
-
-
Save marz-hunter/08e16c84b90e5561f606e58c429d2b98 to your computer and use it in GitHub Desktop.
Decode Base64 strings in a git repo's commit history
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pydriller import RepositoryMining | |
import re | |
import base64 | |
foundSet = set() | |
for commit in RepositoryMining('./').traverse_commits(): | |
for mod in commit.modifications: | |
if mod.source_code_before != None: | |
regex = re.findall(r"<text encoding=\"base64\">[^>]+</text>", mod.source_code_before) | |
for result in regex: | |
based = str(base64.b64decode(result[len("<text encoding='base64'>"):-len("</text>")])) | |
if based not in foundSet: | |
print(based) | |
foundSet.add(based + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment