vjt (owner)

Revisions

gist: 61474 Download_button fork
public
Description:
Opensource.org mirror script
Public Clone URL: git://gist.github.com/61474.git
Embed All Files: show embed
bin/update_opensource_mirror.sh #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
# Opensource.ORG quick&dirty mirror script.
# (C) 2007 Marcello Barnaba <vjt@openssl.it>
# Released under the terms of the DWTFYW License.
 
# The absolute base path of the htdocs directory
MIRROR_BASE="/home/httpd/antifork.org/htdocs"
 
# The directory under which the contents will be downloaded
MIRROR_DIR="opensource.antifork.org"
 
# The URI to mirror
MIRROR_URI="http://opensource.org"
 
# A directory containing a placeholder page to show while
# the script is running
WIP_DIR="${MIRROR_DIR}-updating"
 
# A Temporary working directory
WORK_DIR="${MIRROR_DIR}-$RANDOM"
 
# The path to the wget(1) program
WGET="/usr/bin/wget --quiet"
 
# Abort on errors .. yuck!
set -e
 
# .................................................
# NO USER SERVICEABLE PARTS BELOW THE DOTTED LINE .
# .................................................
 
pushd "$MIRROR_BASE" > /dev/null
 
# wget madness downloads robots.txt continuously,
# so erase and re-download every time. for this kind
# of site, it's ok.
#
rm -rf $MIRROR_DIR
ln -s $WIP_DIR $MIRROR_DIR
mkdir $WORK_DIR
 
# Download the HTML and IMG stuff ..
#
$WGET --domains=opensource.org --convert-links --level=0 \
    --mirror --page-requisites --no-host-directories \
    $MIRROR_URI --exclude-directories '/user,/event' \
    --directory-prefix="$WORK_DIR" --html-extension
 
# Get all the stylesheets from the ToS page, one that will never
# disappear, hopefully
#
STYLESHEETS=$(grep css $WORK_DIR/ToS.html | sed 's#.*import "\([a-z0-9/\.-]*\)".*#\1#')
 
# Download all the images referenced in the CSS style sheets
#
for stylesheet in $STYLESHEETS; do
base=`dirname $stylesheet`
    mkdir -p "$WORK_DIR/$base"
    $WGET -O "$WORK_DIR/$stylesheet" "$MIRROR_URI/$stylesheet"
    sed -n 's#.*url(\(.*\)).*#\1#p' < "$WORK_DIR/$stylesheet" | sort | uniq | while read image; do
        $WGET -O $WORK_DIR/$base/$image $MIRROR_URI/$base/$image
    done
done
 
# Remove the placeholder page
rm -f $MIRROR_DIR
 
# Put the mirror online
mv $WORK_DIR $MIRROR_DIR
 
popd > /dev/null
 
# EOF
tmp/placeholder.html #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
<html>
  <head>
    <title>The mirror is currently updating. Please wait.</title>
  </head>
  <body>
    <h2>The mirror is currently resyncing to the master site.</h2>
    <h3>The process will be completed in a matter of minutes.</h3>
    <p>In the meantime, try to visit the <a href="http://opensource.org">original site</a> or, if it's unreachable, try one of the following mirrors:</p>
    <p><a href="http://opensource.gds.tuwien.ac.at/">Austria</a> | <a href="http://os.fsfmirror.com/">Belgium</a> | <a href="http://opensource.usrbinruby.net/">Canada</a> (<a href="http://open2.mirrors-r-us.net/">2</a>) | <a href="http://opensource.mirrors.typhon.net/">France</a> (<a href="http://os3.fsfmirror.com/">2</a>) | <a href="http://opensource.mirroring.de/">Germany</a> (<a href="http://opensource.linux-mirror.org/">2</a>,<a href="http://os2.fsfmirror.com/">3</a>,<a href="http://opensource.erde3.net/">4</a>) | <a href="http://open3.mirrors-r-us.net">Japan</a> | (<a href="http://open1.mirrors-r-us.net/">2</a>) | <a href="http://os3.osmirror.com/">Singapore</a> | <a href="http://opensource.openmirrors.org">UK</a> (<a href="http://2opensource.openmirrors.org">2</a>) | USA: <a href="http://www.free-soft.org/mirrors/www.opensource.org/">LA</a>, <a href="http://osmirror.com/">Montana</a> (<a href="http://os2.osmirror.com/">2</a>), (<a href="http://opensource2.usrbinruby.net/">3</a>)</p>
 
    <p style="margin-top: 50px; font-style:italic">Thanks for visiting opensource.antifork.org!</p>
  </body>
</html>