Skip to content

Instantly share code, notes, and snippets.

@filips123
Last active February 7, 2021 08:47
Show Gist options
  • Save filips123/eafe298f7b4f1bafefcf494d4ae26e51 to your computer and use it in GitHub Desktop.
Save filips123/eafe298f7b4f1bafefcf494d4ae26e51 to your computer and use it in GitHub Desktop.
ZeroArchive

ZeroArchive

Automatic mirroring of websites to ZeroNet.

Requirements

You have to install Python 3, ZeroNet and HTTrack on your computer.

You also have to create new empty ZeroNet site which will be used as archive destination. It's recommended that you use one ZeroNet site for each URL you want to archive.

Usage

Run main.py with this arguments:

  • website - Main website URL and additional HTTrack URLs

  • --depth - Link depth for HTTrack (default 5)

  • --zite - Path to ZeroNet site

  • --address - Address of ZeroNet site

  • --privkey - Private key of ZeroNet site

  • --zeronet - Path to ZeroNet installation (file)

  • --httrack - Path to HTTrack installation (file)

Examples

Example command for Windows with ZeroBundle:

python main.py --zite path/to/ZeroNet/data/12LkrG7Fsptiu4fGveknNwd6r92REdQU2S --address 12LkrG7Fsptiu4fGveknNwd6r92REdQU2S --privkey privateKey --zeronet path/to/ZeroNet/ZeroNet-cli --httrack /path/to/httrack/httrack https://example.com

Example ZeroArchive site:

http://127.0.0.1:43110/12LkrG7Fsptiu4fGveknNwd6r92REdQU2S
from urllib.parse import urlparse
from shutil import copyfile
import subprocess
import datetime
import argparse
import sys
import os
import re
def execute(*command, stdout=False, stderr=True):
FNULL = open(os.devnull, 'w')
if stdout: stdout = sys.stdout
else: stdout = FNULL
if stderr: stderr = sys.stderr
else: stderr = FNULL
return subprocess.call(command, stdout=stdout, stderr=stderr)
def download(httrack, depth, location, cache, url, *args):
cmd = [
httrack, url, '-O', location + ',' + cache, '-I0', '--depth=' + str(depth),
'--display=2', '--timeout=60', '--retries=99', '--sockets=7', '--connection-per-second=5', '--max-rate=250000', '--keep-alive', '--mirror', '--clean', '--robots=0',
'--user-agent', '\'$(httrack --version); ZeroArchive ()\'',
*args
]
return execute(*cmd)
def navigation(zite, archive, url):
copyfile('template.html', os.path.join(zite, 'index.html'))
parsed = urlparse(url)
hostname = parsed.netloc.replace(':', '_')
if parsed.path:
path = parsed.path[1:] if parsed.path.lower().endswith('.html') or parsed.path.lower().endswith('.htm') else parsed.path[1:] + '.html'
else:
path = 'index.html'
with open(os.path.join(zite, 'archives.csv'), 'a') as archives:
name = os.path.basename(os.path.normpath(archive))
href = os.path.normpath(os.path.join(name, hostname, path)).replace('\\', '/')
archives.write(name + ',' + href + '\n')
pattern = re.compile(r'<title>(.*)<\/title>')
for i, line in enumerate(open(os.path.join(archive, hostname, path), encoding='utf8')):
for match in re.finditer(pattern, line):
title = match.groups()[0]
break
if not 'title' in locals():
title = 'Website'
with open(os.path.join(zite, 'index.html'), 'r', encoding='utf8') as file:
index = file.read()
index = index.replace('%SITE-ORIGINAL%', url)
index = index.replace('%SITE-TITLE%', title)
archives = []
for line in reversed(list(open(os.path.join(zite, 'archives.csv')))):
data = line.strip().split(',')
try:
name = datetime.datetime.strptime(data[0], '%Y%m%d%H%M%S').strftime('%Y-%m-%d %H:%M:%S')
href = data[1]
except:
continue
archive = '<li><a href="' + href + '">' + name + '</a></li>'
archives.append(archive)
index = index.replace('%SITE-ARCHIVES%', '\n' + '\n'.join(archives) + '\n')
with open(os.path.join(zite, 'index.html'), 'w', encoding='utf8') as file:
file.write(index)
def sign(zeronet, address, privkey):
return execute(zeronet, 'siteSign', address, privkey)
def publish(zeronet, address):
return execute(zeronet, 'sitePublish', address)
def main():
parser = argparse.ArgumentParser(
prog=__package__,
description='Download website and publish it to ZeroNet'
)
parser.add_argument('website', type=str, nargs='+', help='website URLs to download')
parser.add_argument('--depth', type=int, default=5, help='depth to clone')
parser.add_argument('--zite', required=True, help='path to ZeroNet site')
parser.add_argument('--address', required=True, help='address of ZeroNet site')
parser.add_argument('--privkey', required=True, help='private key of ZeroNet site')
parser.add_argument('--zeronet', required=True, help='path to ZeroNet installation')
parser.add_argument('--httrack', required=True, help='path to HTTrack installation')
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
timestamp = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')
print('Downloading website')
st = download(args.httrack, args.depth, os.path.join(args.zite, timestamp), os.path.join(args.zite, 'cache'), *args.website)
if st:
print('Error while downloading website', file=sys.stderr)
sys.exit(1)
print('Creating navigation page')
navigation(args.zite, os.path.join(args.zite, timestamp), args.website[0])
print('Signing site')
st = sign(args.zeronet, args.address, args.privkey)
if st:
print('Error while signing site', file=sys.stderr)
sys.exit(1)
print('Publishing site')
st = sign(args.zeronet, args.address, args.privkey)
if st:
print('Error while publishing site', file=sys.stderr)
sys.exit(1)
print('Done')
main()
<!DOCTYPE html>
<html>
<head>
<title>ZeroArchive for %SITE-TITLE%</title>
<meta charset="utf-8" />
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<base href="" target="_top" id="base" />
<script>base.href = document.location.href.replace("/media", "").replace("index.html", "").replace(/[&?]wrapper=False/, "").replace(/[&?]wrapper_nonce=[A-Za-z0-9]+/, "")</script>
<style type="text/css">
body {
background-color: #f0f0f2;
margin: 0;
padding: 0;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
}
div {
width: 1500px;
margin: 5em auto;
padding: 50px;
background-color: #fff;
border-radius: 1em;
}
hr {
border: 0;
border-top: 1px solid #8c8c8c;
border-bottom: 1px solid #fff;
}
a:link, a:visited {
color: #38488f;
text-decoration: none;
}
@media (max-width: 1500px) {
body {
background-color: #fff;
}
div {
width: auto;
margin: 0 auto;
border-radius: 0;
padding: 1em;
}
}
</style>
<div>
<h1>ZeroArchive for %SITE-TITLE%</h1>
<p>This site is ZeroArchive mirror for <a href="%SITE-ORIGINAL%">%SITE-TITLE%</a>.</p>
<p>It uses <a href="https://www.httrack.com/">HTTrack</a> for mirroring websites and <a href="https://gist.github.com/filips123/eafe298f7b4f1bafefcf494d4ae26e51">ZeroArchive</a> for publishing them to ZeroNet. Please see ZeroArchive website if you want to create a mirror for your website.</p>
<hr />
<p>
<strong>Available archives:</strong>
<ul>%SITE-ARCHIVES%</ul>
</p>
</div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment