Skip to content

Instantly share code, notes, and snippets.

@ryantuck
Created May 28, 2019 20:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryantuck/695b23e286f9a802793e24ed553dee92 to your computer and use it in GitHub Desktop.
Save ryantuck/695b23e286f9a802793e24ed553dee92 to your computer and use it in GitHub Desktop.
"""
TruffleHog Results Parsing
Designed to operate on the output of trufflehog's json output:
$ trufflehog --json <my_repo> > my_output.json
Expects a `trufflehog_output.json` file, and a `trufflehog_whitelist.yml` file.
Whitelist config file should look like:
string_prefixes:
- not/bad/string
string_suffixes:
- endofnotbadstring
strings:
- averylongstringthatisnotbad
paths:
- path/to/file/containing/bunch/of/high/entropy/strings
By default, outputs offending paths and strings like so:
/path/number/1
LONGSTRING
LONGSTRING2
/path/number/2
LONGSTRING2
LONGSTRING3
The idea here is:
1. Run with no whitelisted configs etc
2. Start digging through example diffs to see what you can whitelist
3. Add strings/paths/prefixes/suffixes to whitelist
4. Rinse and repeat until you've whittled your output down to bad strings
"""
# pylint: disable=invalid-name
import json
import yaml
ISSUES_FILEPATH = 'trufflehog_output.json'
WHITELIST_FILEPATH = 'trufflehog_whitelist.yml'
def read_issues():
"""
Returns list of issues as dicts.
"""
with open(ISSUES_FILEPATH) as f:
return [json.loads(row.strip()) for row in f.readlines()]
def _get_whitelist_section(section):
with open(WHITELIST_FILEPATH) as f:
cfg = yaml.load(f)
return cfg.get(section, [])
def _parse_strings(strings):
"""
Returns stringsFound field as list
"""
if isinstance(strings, list):
return strings
if isinstance(strings, str):
return [strings]
raise Exception(strings)
def is_offending_string(string, prefixes=None, suffixes=None, strings=None):
"""
Check to see if string has not been whitelisted.
Optionally pass in any whitelisted `prefixes`, `suffixes`, or `strings`.
"""
if any(string.startswith(prefix) for prefix in prefixes):
return False
if any(string.endswith(suffix) for suffix in suffixes):
return False
if string in strings:
return False
return True
def file_breakdowns():
"""
Return a dict like:
{path: [unique strings]}
For all valid offending files.
"""
data = read_issues()
ok_prefixes = _get_whitelist_section('string_prefixes')
ok_suffixes = _get_whitelist_section('string_suffixes')
ok_strings = _get_whitelist_section('strings')
ok_paths = _get_whitelist_section('paths')
paths = sorted(set(o['path'] for o in data if o['path'] not in ok_paths))
results = {p: [] for p in paths}
for p in paths:
for o in data:
if o['path'] == p:
path_strings = _parse_strings(o['stringsFound'])
bad_strings = [
s
for s in path_strings
if is_offending_string(
string=s,
prefixes=ok_prefixes,
suffixes=ok_suffixes,
strings=ok_strings,
)
]
results[p] += bad_strings
return {
path: set(s for s in strings)
for path, strings in results.items()
if strings != []
}
def main():
"""
Main function to output files and their non-whitelisted strings.
"""
fb = file_breakdowns()
for path, strings in fb.items():
print(path)
for s in strings:
print(f' {s}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment