Skip to content

Instantly share code, notes, and snippets.

@josch
Created October 12, 2012 19:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save josch/3880904 to your computer and use it in GitHub Desktop.
Save josch/3880904 to your computer and use it in GitHub Desktop.
download and satitize debian packages and sources files from snapshot.debian.org
#!/bin/sh
# this script first downloads and then sanitizes debian Packages and Sources
# files from snapshot.debian.org for every 5 days since the start of
# snapshot.debian.org
dist=sid
arch=i386
dates=`cat << END | python -c "import sys; exec sys.stdin.read()"
from datetime import date, timedelta as t
last = date.today()
i = date(2005, 3, 12)
while (i < last):
print i.strftime("%Y%m%d")
i += t(5)
END`
for d in $dates; do
echo "$d"
if [ ! -f $dist-$arch-packages-$d.bz2 ]; then
wget --quiet http://snapshot.debian.org/archive/debian/${d}T000000Z/dists/$dist/main/binary-$arch/Packages.bz2 -O $dist-$arch-packages-$d.bz2
fi
if [ ! -f $dist-sources-$d.bz2 ]; then
wget --quiet http://snapshot.debian.org/archive/debian/${d}T000000Z/dists/$dist/main/source/Sources.bz2 -O $dist-sources-$d.bz2
fi
done
echo 1/12
# version numbers cannot contain special characters like this:
# Replaces: icedove (<< 2.0~Â)
for f in `bzgrep -l -P 'Replaces: icedove \(<< 2.0\x7e\xc2\)' *packages-200706??.bz2`; do
bzcat $f | sed 's/Replaces: icedove (<< 2.0\x7e\xc2)/Replaces: icedove (<< 2.0)/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 2/12
# version numbers cannot contain braces and dollar signs like this:
# Depends: syncekonnector (= ${Source-Version])
for f in `bzegrep -l 'syncekonnector \(= \\${Source-Version]\),' *packages-200603??.bz2`; do
bzcat $f | sed 's/syncekonnector (= ${Source-Version]),/syncekonnector,/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 3/12
# version numbers cannot be only plus signs like this:
# libghc6-hsql-dev (<< ++)
for f in `bzegrep -l 'libghc6-hsql-dev \(<< \+\+\),' *packages-200608??.bz2`; do
bzcat $f | sed 's/libghc6-hsql-dev (<< ++),/libghc6-hsql-dev,/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 4/12
# version numbers cannot be <none> or <none>+~ like this:
# python-sip4 (>= <none>), python-sip4 (<< <none>+~)
for f in `bzegrep -l 'python-sip4 \(>= <none>\), python-sip4 \(<< <none>\+~\)' *packages-201003??.bz2 *packages-201004??.bz2 *packages-201005??.bz2`; do
bzcat $f | sed 's/python-sip4 (>= <none>), python-sip4 (<< <none>+~)/python-sip4/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 5/12
# version constraints start with a single parenthesis and like this:
# libsdl1.2-dev ((>=1.2.7+1.2.8cvs20041007-5.2)
for f in `bzegrep -l 'libsdl1.2-dev \(\(>=1.2.7\+1.2.8cvs20041007-5.2\),' *sources-200508??.bz2`; do
bzcat $f | sed 's/libsdl1.2-dev ((>=1.2.7+1.2.8cvs20041007-5.2),/libsdl1.2-dev (>=1.2.7+1.2.8cvs20041007-5.2),/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 6/12
# version numbers cannot contain braces and dollar signs like this:
# Build-Conflicts: libxosd-dev (<< ${Source-Version})
for f in `bzegrep -l 'libxosd-dev \(<< \\${Source-Version}\)' *sources-2007????.bz2 *sources-2008????.bz2`; do
bzcat $f | sed 's/libxosd-dev (<< ${Source-Version})/libxosd-dev/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 7/12
# conflicts cannot be disjunctions like this:
# Build-Conflicts-Indep: gs-afpl | gs-gpl (= 8.01-1)
for f in `bzegrep -l 'Build-Conflicts-Indep: gs-afpl \| gs-gpl \(= 8.01-1\),' *sources-2005????.bz2 *sources-2006????.bz2 *sources-2007????.bz2`; do
bzcat $f | sed 's/Build-Conflicts-Indep: gs-afpl | gs-gpl (= 8.01-1),/Build-Conflicts-Indep: gs-afpl, gs-gpl (= 8.01-1),/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 8/12
# version numbers cannot start with a point like this:
# libvte-dev (>= .10.26)
for f in `bzegrep -l 'libvte-dev \(>= .10.26\),' *sources-200503??.bz2 *sources-200504??.bz2 *sources-200505??.bz2`; do
bzcat $f | sed 's/libvte-dev (>= .10.26),/libvte-dev (>= 0.10.26),/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 9/12
# architectures are not separated by commas like this:
# expect (>= 5.38.0) [!hppa, !hurd-i386]
for f in `bzegrep -l 'expect \(>= 5.38.0\) \[!hppa, !hurd-i386\],' *sources-200505??.bz2 *sources-200506??.bz2`; do
bzcat $f | sed 's/expect (>= 5.38.0) \[!hppa, !hurd-i386\],/expect (>= 5.38.0) \[!hppa !hurd-i386\],/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 10/12
# architectures are not separated by commas like this:
# Architecture: kfreebsd-i386, i386
for f in `bzegrep -l 'Architecture: kfreebsd-i386, i386' *sources-2005????.bz2 *sources-2006????.bz2`; do
bzcat $f | sed 's/Architecture: kfreebsd-i386, i386/Architecture: kfreebsd-i386 i386/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 11/12
# conflicts cannot be disjunctions like this:
# Build-Conflicts: ghostscript [hppa sparc] | gs [hppa sparc]
for f in `bzegrep -l 'Build-Conflicts: ghostscript \[hppa sparc\] \| gs \[hppa sparc\]' *sources-200804??.bz2`; do
bzcat $f | sed 's/Build-Conflicts: ghostscript \[hppa sparc\] | gs \[hppa sparc\]/Build-Conflicts: ghostscript [hppa sparc], gs [hppa sparc]/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
echo 12/12
# an architecture list does not contain a parenthesis in the end like this:
# [i386 m68k mips mipsel powerpc s390 alpha amd64 armel hppa ia64 ppc64 s390x sparc)]
for f in `bzegrep -l 'sparc\)]' *sources-201004??.bz2 *sources-201005??.bz2`; do
bzcat $f | sed 's/sparc)]/sparc]/' > `basename $f .bz2`
bzip2 -f `basename $f .bz2`
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment