Skip to content

Instantly share code, notes, and snippets.

@MrMoshkovitz
Last active May 1, 2024 20:55
Show Gist options
  • Save MrMoshkovitz/ccf586f4c74a3ae9137665976a25c412 to your computer and use it in GitHub Desktop.
Save MrMoshkovitz/ccf586f4c74a3ae9137665976a25c412 to your computer and use it in GitHub Desktop.
Github Dorks
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import github3 as github
import os
import argparse
import time
import feedparser
from copy import copy
from sys import stderr, prefix
gh_user = os.getenv('GH_USER', None)
gh_pass = os.getenv('GH_PWD', None)
gh_token = os.getenv('GH_TOKEN', None)
gh_url = os.getenv('GH_URL', None)
if gh_url is None:
gh = github.GitHub(username=gh_user, password=gh_pass, token=gh_token)
else:
gh = github.GitHubEnterprise(
url=gh_url, username=gh_user, password=gh_pass, token=gh_token)
def search_wrapper(gen):
while True:
gen_back = copy(gen)
try:
yield next(gen)
except StopIteration:
return
except github.exceptions.ForbiddenError as e:
search_rate_limit = gh.rate_limit()['resources']['search']
# limit_remaining = search_rate_limit['remaining']
reset_time = search_rate_limit['reset']
current_time = int(time.time())
sleep_time = reset_time - current_time + 1
stderr.write(
'GitHub Search API rate limit reached. Sleeping for %d seconds.\n\n'
% (sleep_time))
time.sleep(sleep_time)
yield next(gen_back)
except Exception as e:
raise e
def metasearch(repo_to_search=None,
user_to_search=None,
gh_dorks_file=None,
active_monit=None,
output_filename=None,
refresh_time=60):
if active_monit is None:
search(repo_to_search, user_to_search, gh_dorks_file, active_monit, output_filename)
else:
monit(gh_dorks_file, active_monit, refresh_time)
def monit(gh_dorks_file=None, active_monit=None, refresh_time=60):
if gh_user is None:
raise Exception('Error, env Github user variable needed')
else:
print(
'Monitoring user private feed searching new code to be dorked.' +
'Every new merged pull request trigger user scan.'
)
print('-----')
items_history = list()
gh_private_feed = "https://github.com/{}.private.atom?token={}".format(
gh_user, active_monit)
while True:
feed = feedparser.parse(gh_private_feed)
for i in feed['items']:
if 'merged pull' in i['title']:
if i['title'] not in items_history:
search(
user_to_search=i['author_detail']['name'],
gh_dorks_file=gh_dorks_file)
items_history.append(i['title'])
print('Waiting for new items...')
time.sleep(refresh_time)
def search(repo_to_search=None,
user_to_search=None,
gh_dorks_file=None,
active_monit=None,
output_filename=None):
if gh_dorks_file is None:
for path_prefix in ['.', os.path.join(prefix, 'github-dorks/')]:
filename = os.path.join(path_prefix, 'github-dorks.txt')
if os.path.isfile(filename):
gh_dorks_file = filename
break
if not os.path.isfile(gh_dorks_file):
raise Exception('Error, the dorks file path is not valid')
if user_to_search:
print("Scanning User: ", user_to_search)
if repo_to_search:
print("Scanning Repo: ", repo_to_search)
found = False
outputFile = None
if output_filename:
outputFile = open(output_filename, 'w')
with open(gh_dorks_file, 'r') as dork_file:
# Write CSV Header
if outputFile:
outputFile.write('Issue Type (Dork), Text Matches, File Path, Score/Relevance, URL of File\n')
for dork in dork_file:
dork = dork.strip()
if not dork or dork[0] in '#;':
continue
addendum = ''
if repo_to_search:
addendum = ' repo:' + repo_to_search
elif user_to_search:
addendum = ' user:' + user_to_search
dork = dork + addendum
search_results = search_wrapper(gh.search_code(dork))
try:
for search_result in search_results:
found = True
fmt_args = {
'dork': dork,
'text_matches': search_result.text_matches,
'path': search_result.path,
'score': search_result.score,
'url': search_result.html_url
}
# Either write to file or print output
if outputFile:
outputFile.write('{dork}, {text_matches}, {path}, {score}, {url}\n'.format(**fmt_args))
else:
result = '\n'.join([
'Found result for {dork}',
'Text matches: {text_matches}', 'File path: {path}',
'Score/Relevance: {score}', 'URL of File: {url}', ''
]).format(**fmt_args)
print(result)
except github.exceptions.GitHubError as e:
print('GitHubError encountered on search of dork: ' + dork)
print(e)
return
except Exception as e:
print(e)
print('Error encountered on search of dork: ' + dork)
if not found:
print('No results for your dork search' + addendum + '. Hurray!')
def main():
parser = argparse.ArgumentParser(
description='Search github for github dorks',
epilog='Use responsibly, Enjoy pentesting')
parser.add_argument(
'-v', '--version', action='version', version='%(prog)s 0.1.1')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
'-u',
'--user',
dest='user_to_search',
action='store',
help='Github user/org to search within. Eg: techgaun')
group.add_argument(
'-r',
'--repo',
dest='repo_to_search',
action='store',
help='Github repo to search within. Eg: techgaun/github-dorks')
parser.add_argument(
'-d',
'--dork',
dest='gh_dorks_file',
action='store',
help='Github dorks file. Eg: github-dorks.txt')
group.add_argument(
'-m',
'--monit',
dest='active_monit',
action='store',
help='Monitors Github user private feed with feed token'
)
parser.add_argument(
'-o',
'--outputFile',
dest='output_filename',
action='store',
help='CSV File to write results to. This overwrites the file provided! Eg: out.csv'
)
args = parser.parse_args()
metasearch(
repo_to_search=args.repo_to_search,
user_to_search=args.user_to_search,
gh_dorks_file=args.gh_dorks_file,
active_monit=args.active_monit,
output_filename=args.output_filename)
if __name__ == '__main__':
main()
filename:.npmrc _auth
filename:.dockercfg auth
extension:md
filename:.npmrc _auth
filename:.dockercfg auth
extension:pem private
extension:ppk private
filename:id_rsa or filename:id_dsa
extension:sql mysql dump
extension:sql mysql dump password
filename:credentials aws_access_key_id
filename:.s3cfg
filename:wp-config.php
filename:.htpasswd
filename:.env DB_USERNAME NOT homestead
filename:.env MAIL_HOST=smtp.gmail.com
filename:.git-credentials
PT_TOKEN language:bash
filename:.bashrc password
filename:.bashrc mailchimp
filename:.bash_profile aws
rds.amazonaws.com password
extension:json api.forecast.io
extension:json mongolab.com
extension:yaml mongolab.com
jsforce extension:js conn.login
SF_USERNAME salesforce
filename:.tugboat NOT _tugboat
HEROKU_API_KEY language:shell
HEROKU_API_KEY language:json
filename:.netrc password
filename:_netrc password
filename:hub oauth_token
filename:robomongo.json
filename:filezilla.xml Pass
filename:recentservers.xml Pass
filename:config.json auths
filename:idea14.key
filename:config irc_pass
filename:connections.xml
filename:express.conf path:.openshift
filename:.pgpass
filename:proftpdpasswd
filename:ventrilo_srv.ini
[WFClient] Password= extension:ica
filename:server.cfg rcon password
JEKYLL_GITHUB_TOKEN
filename:.bash_history
filename:.cshrc
filename:.history
filename:.sh_history
filename:sshd_config
filename:dhcpd.conf
filename:prod.exs NOT prod.secret.exs
filename:prod.secret.exs
filename:configuration.php JConfig password
filename:config.php dbpasswd
filename:config.php pass
path:sites databases password
shodan_api_key language:python
shodan_api_key language:shell
shodan_api_key language:json
shodan_api_key language:ruby
filename:shadow path:etc
filename:passwd path:etc
extension:avastlic "support.avast.com"
filename:dbeaver-data-sources.xml
filename:sftp-config.json
filename:.esmtprc password
extension:json googleusercontent client_secret
HOMEBREW_GITHUB_API_TOKEN language:shell
xoxp OR xoxb
.mlab.com password
filename:logins.json
filename:CCCam.cfg
msg nickserv identify filename:config
filename:settings.py SECRET_KEY
filename:secrets.yml password
filename:master.key path:config
filename:deployment-config.json
filename:.ftpconfig
filename:.remote-sync.json
filename:sftp.json path:.vscode
filename:WebServers.xml
filename:jupyter_notebook_config.json
"api_hash" "api_id"
"https://hooks.slack.com/services/"
filename:github-recovery-codes.txt
filename:gitlab-recovery-codes.txt
filename:discord_backup_codes.txt
extension:yaml cloud.redislabs.com
extension:json cloud.redislabs.com

Github Dorks Repo

Github Dorks

Github Search is a quite powerful and useful feature that can be used to search for sensitive data on repositories. Collection of Github dorks can reveal sensitive personal and/or organizational information such as private keys, credentials, authentication tokens, etc. This list is supposed to be useful for assessing security and performing pen-testing of systems.

GitHub Dork Search Tool

github-dork.py is a simple python tool that can search through your repository or your organization/user repositories. It's not a perfect tool at the moment but provides basic functionality to automate the search on your repositories against the dorks specified in the text file.

Installation

This tool uses github3.py to talk with GitHub Search API.

Clone this repository and run:

pip install .

Usage

GH_USER  - Environment variable to specify Github user
GH_PWD   - Environment variable to specify a password
GH_TOKEN - Environment variable to specify Github token
GH_URL   - Environment variable to specify GitHub Enterprise base URL

Some example usages are listed below:

github-dork.py -r techgaun/github-dorks                          # search a single repo

github-dork.py -u techgaun                                       # search all repos of a user

github-dork.py -u dev-nepal                                      # search all repos of an organization

GH_USER=techgaun GH_PWD=<mypass> github-dork.py -u dev-nepal     # search as authenticated user

GH_TOKEN=<github_token> github-dork.py -u dev-nepal              # search using auth token

GH_URL=https://github.example.com github-dork.py -u dev-nepal    # search a GitHub Enterprise instance

Limitations

  • Authenticated requests get a higher rate limit. But, since this tool waits for the api rate limit to be reset (which is usually less than a minute), it can be slightly slow.
  • Output formatting is not great. PR welcome
  • Handle rate limit and retry. PR welcome

Contribution

Please consider contributing dorks that can reveal potentially sensitive information on Github.

List of Dorks

I am not categorizing at the moment. Instead, I am going to just the list of dorks with a description. Many of the dorks can be modified to make the search more specific or generic. You can see more options here.

Dork Description
filename:.npmrc _auth npm registry authentication data
filename:.dockercfg auth docker registry authentication data
extension:pem private private keys
extension:ppk private puttygen private keys
filename:id_rsa or filename:id_dsa private ssh keys
extension:sql mysql dump mysql dump
extension:sql mysql dump password mysql dump look for password; you can try varieties
filename:credentials aws_access_key_id might return false negatives with dummy values
filename:.s3cfg might return false negatives with dummy values
filename:wp-config.php wordpress config files
filename:.htpasswd htpasswd files
filename:.env DB_USERNAME NOT homestead laravel .env (CI, various ruby based frameworks too)
filename:.env MAIL_HOST=smtp.gmail.com gmail smtp configuration (try different smtp services too)
filename:.git-credentials git credentials store, add NOT username for more valid results
PT_TOKEN language:bash pivotaltracker tokens
filename:.bashrc password search for passwords, etc. in .bashrc (try with .bash_profile too)
filename:.bashrc mailchimp variation of above (try more variations)
filename:.bash_profile aws aws access and secret keys
rds.amazonaws.com password Amazon RDS possible credentials
extension:json api.forecast.io try variations, find api keys/secrets
extension:json mongolab.com mongolab credentials in json configs
extension:yaml mongolab.com mongolab credentials in yaml configs (try with yml)
jsforce extension:js conn.login possible salesforce credentials in nodejs projects
SF_USERNAME salesforce possible salesforce credentials
filename:.tugboat NOT _tugboat Digital Ocean tugboat config
HEROKU_API_KEY language:shell Heroku api keys
HEROKU_API_KEY language:json Heroku api keys in json files
filename:.netrc password netrc that possibly holds sensitive credentials
filename:_netrc password netrc that possibly holds sensitive credentials
filename:hub oauth_token hub config that stores github tokens
filename:robomongo.json mongodb credentials file used by robomongo
filename:filezilla.xml Pass filezilla config file with possible user/pass to ftp
filename:recentservers.xml Pass filezilla config file with possible user/pass to ftp
filename:config.json auths docker registry authentication data
filename:idea14.key IntelliJ Idea 14 key, try variations for other versions
filename:config irc_pass possible IRC config
filename:connections.xml possible db connections configuration, try variations to be specific
filename:express.conf path:.openshift openshift config, only email and server thou
filename:.pgpass PostgreSQL file which can contain passwords
filename:proftpdpasswd Usernames and passwords of proftpd created by cpanel
filename:ventrilo_srv.ini Ventrilo configuration
[WFClient] Password= extension:ica WinFrame-Client infos needed by users to connect toCitrix Application Servers
filename:server.cfg rcon password Counter Strike RCON Passwords
JEKYLL_GITHUB_TOKEN Github tokens used for jekyll
filename:.bash_history Bash history file
filename:.cshrc RC file for csh shell
filename:.history history file (often used by many tools)
filename:.sh_history korn shell history
filename:sshd_config OpenSSH server config
filename:dhcpd.conf DHCP service config
filename:prod.exs NOT prod.secret.exs Phoenix prod configuration file
filename:prod.secret.exs Phoenix prod secret
filename:configuration.php JConfig password Joomla configuration file
filename:config.php dbpasswd PHP application database password (e.g., phpBB forum software)
path:sites databases password Drupal website database credentials
shodan_api_key language:python Shodan API keys (try other languages too)
filename:shadow path:etc Contains encrypted passwords and account information of new unix systems
filename:passwd path:etc Contains user account information including encrypted passwords of traditional unix systems
extension:avastlic "support.avast.com" Contains license keys for Avast! Antivirus
filename:dbeaver-data-sources.xml DBeaver config containing MySQL Credentials
filename:.esmtprc password esmtp configuration
extension:json googleusercontent client_secret OAuth credentials for accessing Google APIs
HOMEBREW_GITHUB_API_TOKEN language:shell Github token usually set by homebrew users
xoxp OR xoxb Slack bot and private tokens
.mlab.com password MLAB Hosted MongoDB Credentials
filename:logins.json Firefox saved password collection (key3.db usually in same repo)
filename:CCCam.cfg CCCam Server config file
msg nickserv identify filename:config Possible IRC login passwords
filename:settings.py SECRET_KEY Django secret keys (usually allows for session hijacking, RCE, etc)
filename:secrets.yml password Usernames/passwords, Rails applications
filename:master.key path:config Rails master key (used for decrypting credentials.yml.enc for Rails 5.2+)
filename:deployment-config.json Created by sftp-deployment for Atom, contains server details and credentials
filename:.ftpconfig Created by remote-ssh for Atom, contains SFTP/SSH server details and credentials
filename:.remote-sync.json Created by remote-sync for Atom, contains FTP and/or SCP/SFTP/SSH server details and credentials
filename:sftp.json path:.vscode Created by vscode-sftp for VSCode, contains SFTP/SSH server details and credentails
filename:sftp-config.json Created by SFTP for Sublime Text, contains FTP/FTPS or SFTP/SSH server details and credentials
filename:WebServers.xml Created by Jetbrains IDEs, contains webserver credentials with encoded passwords (not encrypted!)
"api_hash" "api_id" Telegram API token
"https://hooks.slack.com/services/" Slack services URL often have secret API token as a suffix
filename:github-recovery-codes.txt GitHub recovery key
filename:gitlab-recovery-codes.txt GitLab recovery key
filename:discord_backup_codes.txt Discord recovery key
extension:yaml cloud.redislabs.com Redis credentials provided by Redis Labs found in a YAML file
extension:json cloud.redislabs.com Redis credentials provided by Redis Labs found in a JSON file
github3.py==1.0.0a2
feedparser==6.0.2
[flake8]
max-line-length = 120
from setuptools import setup
with open('README.md', 'r') as f:
long_description = f.read()
setup(
name='github-dorks',
version='0.1',
description='Find leaked secrets via github search.',
license='Apache License 2.0',
long_description=long_description,
author='Samar Dhwoj Acharya (@techgaun)',
long_description_content_type='text/markdown',
scripts=['github-dork.py'],
data_files=[('github-dorks', ['github-dorks.txt'])],
install_requires=[
'github3.py==1.0.0a2',
'feedparser==6.0.2',
],
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment