Skip to content

Instantly share code, notes, and snippets.

@telecastr
Created May 23, 2021 19:17
Show Gist options
  • Save telecastr/1604aca498d90e81ae586bbcc5006ecc to your computer and use it in GitHub Desktop.
Save telecastr/1604aca498d90e81ae586bbcc5006ecc to your computer and use it in GitHub Desktop.
Dirty fix for anantshri/svn-extractor
#!/usr/bin/env python
import requests
import sys
import argparse
import os
import sqlite3
import traceback
import re
# disable insecurerequestwarning
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def getext(filename):
name, ext = os.path.splitext(filename)
return ext[1:]
def readsvn(data, urli, match, proxy_dict):
old_line = ""
file_list = ""
dir_list = ""
user = ""
pattern = re.compile(match, re.IGNORECASE)
global author_list
global excludes
if not urli.endswith('/'):
urli = urli + "/"
for a in data.text.splitlines():
# below functionality will find all usernames from svn entries file
if a == "has-props":
author_list.append(old_line)
if a == "file":
if not pattern.search(old_line):
continue
ignore = getext(old_line) in excludes
if ignore:
print('{}{}(not extracted)'.format(urli, old_line))
else:
print('{}{}'.format(urli, old_line))
if no_extract and not ignore:
save_url_svn(urli, old_line, proxy_dict)
file_list = file_list + ";" + old_line
if a == "dir":
if old_line != "":
folder_path = os.path.join("output", urli.replace("http://", "").replace("https://", "").replace("/", os.path.sep), old_line)
if not os.path.exists(folder_path):
if no_extract:
os.makedirs(folder_path)
dir_list = dir_list + ";" + old_line
print('{}{}'.format(urli, old_line))
try:
d = requests.get(urli + old_line + "/.svn/entries", verify=False, proxies=(proxy_dict))
readsvn(d, urli + old_line, match, proxy_dict)
except Exception:
print("Error Reading {}{}/.svn/entries so killing".format(urli, old_line))
old_line = a
return file_list, dir_list, user
def readwc(data, urli, match, proxy_dict):
folder = os.path.join("output", urli.replace("http://", "").replace("https://", "").replace("/", os.path.sep))
pattern = re.compile(match, re.IGNORECASE)
global author_list
global excludes
if not folder.endswith(os.path.sep):
folder = folder + os.path.sep
with open(folder + "wc.db", "wb") as f:
f.write(data.content)
conn = sqlite3.connect(folder + "wc.db")
c = conn.cursor()
try:
c.execute('select local_relpath, ".svn/pristine/" || substr(checksum,7,2) || "/" || '
'substr(checksum,7) || ".svn-base" as alpha from NODES where kind="file";')
list_items = c.fetchall()
# below functionality will find all usernames who have commited atleast once.
c.execute('select distinct changed_author from nodes;')
author_list = [r[0] for r in c.fetchall()]
c.close()
for filename, url_path in list_items:
if not pattern.search(filename):
continue
ignore = getext(filename) in excludes
if ignore:
print('{}{}(not extracted)'.format(urli, filename))
else:
print("{}{}".format(urli, filename))
if no_extract and not ignore:
save_url_wc(urli, filename, url_path, proxy_dict)
except Exception:
print("Error reading wc.db, either database corrupt or invalid file")
if show_debug:
traceback.print_exc()
return 1
return 0
def show_list(_list, statement):
print(statement)
cnt = 1
for x in set(_list):
print("{} : {}".format(cnt, x))
cnt = cnt + 1
def save_url_wc(url, filename, svn_path, proxy_dict):
global author_list
if filename != "":
if svn_path is None:
folder_path = os.path.join("output", url.replace("http://", "").replace("https://", "").replace("/", os.path.sep, filename.replace("/", os.path.sep)))
if not os.path.exists(folder_path):
os.makedirs(folder_path)
else:
folder = os.path.join("output", url.replace("http://", "").replace("https://", "").replace("/", os.path.sep), os.path.dirname(filename).replace("/", os.path.sep))
if not os.path.exists(folder):
os.makedirs(folder)
if not folder.endswith(os.path.sep):
folder = folder + os.path.sep
try:
r = requests.get(url + "/" + svn_path, verify=False, proxies=(proxy_dict))
with open(folder + os.path.basename(filename), "wb") as f:
f.write(r.content)
except Exception:
print("Error while accessing : {}{}".format(url, svn_path))
if show_debug:
traceback.print_exc()
return 0
def save_url_svn(url, filename, proxy_dict):
global author_list
folder = os.path.join("output", url.replace("http://", "").replace("https://", "").replace("/", os.path.sep))
if not folder.endswith(os.path.sep):
folder = folder + os.path.sep
try:
r = requests.get(url + "/.svn/text-base/" + filename + ".svn-base", verify=False, proxies=(proxy_dict))
if not os.path.isdir(folder+filename):
with open(folder + filename, "wb") as f:
f.write(r.content)
except Exception:
print("Problem saving the URL")
if show_debug:
traceback.print_exc()
return 0
def main(argv):
# placing global variables outside all scopes
global show_debug
global no_extract
global excludes
global author_list
author_list = []
desc = """This program is used to extract the hidden SVN files from a webhost considering
either .svn entries file (<1.6)
or wc.db (> 1.7) are available online.
This program actually automates the directory navigation and text extraction process"""
epilog = """Credit (C) Anant Shrivastava http://anantshri.info. Contributions from orf, sullo, paddlesteamer.
Greets to Amol Naik, Akash Mahajan, Prasanna K, Lava Kumar for valuable inputs"""
parser = argparse.ArgumentParser(description=desc, epilog=epilog)
parser.add_argument("--url", help="Provide URL", dest='target', required=True)
parser.add_argument("--debug", help="Provide debug information", action="store_true")
parser.add_argument("--noextract", help="Don't extract files just show content", action="store_false")
# using no extract in a compliment format if its defined then it will be false hence
parser.add_argument("--userlist", help="show the usernames used for commit", action="store_true")
parser.add_argument("--wcdb", help="check only wcdb", action="store_true")
parser.add_argument("--entries", help="check only .svn/entries file", action="store_true")
parser.add_argument("--proxy", help="Provide HTTP Proxy in http(s)://host:port format", dest='proxy', required=False)
parser.add_argument("--match", help="only download files that match regex")
parser.add_argument("--exclude", help="exclude files with extensions separated by ','", dest='excludes', default='')
x = parser.parse_args()
url = x.target
no_extract = x.noextract
show_debug = x.debug
match = x.match
excludes = x.excludes.split(',')
prox = x.proxy
proxy_dict = ""
if prox is not None:
print(prox)
print("Proxy Defined")
proxy_dict = {"http": prox, "https": prox}
else:
print("Proxy not defined")
proxy_dict = ""
if match:
print("Only downloading matches to {}".format(match))
match = "("+match+"|entries$|wc.db$)" # need to allow entries$ and wc.db too
else:
match = ""
if x.wcdb and x.entries:
print("Checking both wc.db and .svn/entries (default behaviour no need to specify switch)")
x.wcdb = False
x.entries = False
if url is None:
exit()
print(url)
#if not url.endswith('/'):
# url = url + "/"
print("Checking if URL is correct")
try:
r = requests.get(url, verify=False, proxies=(proxy_dict))
except Exception as e:
print("Problem connecting to URL")
print(e)
if show_debug:
traceback.print_exc()
exit()
if [200, 403, 500].count(r.status_code) > 0:
print("URL is active")
if no_extract:
folder_path = os.path.join("output", url.replace("http://", "").replace("https://", "").replace("/", os.path.sep))
if not os.path.exists(folder_path):
os.makedirs(folder_path)
if not x.entries:
print("Checking for presence of wc.db")
r = requests.get(url + "/.svn/wc.db", verify=False, allow_redirects=False, proxies=(proxy_dict))
if r.status_code == 200:
print("WC.db found")
rwc = readwc(r, url, match, proxy_dict)
if rwc == 0:
if x.userlist:
show_list(author_list, "List of usernames used to commit in svn are listed below")
exit()
else:
if show_debug:
print("Status code returned : {}".format(r.status_code))
print("Full Response")
print(r.text)
print("WC.db Lookup FAILED")
if not x.wcdb:
print("lets see if we can find .svn/entries")
# disabling redirection to make sure no redirection based 200ok is captured.
r = requests.get(url + "/.svn/entries", verify=False, allow_redirects=True, proxies=(proxy_dict))
if r.status_code == 200:
print("SVN Entries Found if no file listed check wc.db too")
readsvn(r, url, match, proxy_dict)
if 'author_list' in globals() and x.userlist:
show_list(author_list, "List of Usernames used to commit in svn are listed below")
# print author_list
exit()
else:
if show_debug:
print("Status code returned : {}".format(r.status_code))
print("Full response")
print(r.text)
print(".svn/entries lookup FAILED")
print("{} doesn't contains any SVN repository in it".format(url))
else:
print("URL returns {}".format(r.status_code))
exit()
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment