Forked from tillson/gist:620e8ef87bc057f25b0a27c423433fda
Created
May 25, 2020 13:59
-
-
Save shamrocksu88/89a938e64a75551b5a70d5f03f66daa7 to your computer and use it in GitHub Desktop.
Decode Base64 strings in a git repo's commit history
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pydriller import RepositoryMining | |
import re | |
import base64 | |
foundSet = set() | |
for commit in RepositoryMining('./').traverse_commits(): | |
for mod in commit.modifications: | |
if mod.source_code_before != None: | |
regex = re.findall(r"<text encoding=\"base64\">[^>]+</text>", mod.source_code_before) | |
for result in regex: | |
based = str(base64.b64decode(result[len("<text encoding='base64'>"):-len("</text>")])) | |
if based not in foundSet: | |
print(based) | |
foundSet.add(based + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment