Skip to content

Instantly share code, notes, and snippets.

@smoser
Last active October 27, 2016 20:07
Show Gist options
  • Save smoser/4c5480b3c98c37a4eff0715e51f2c27b to your computer and use it in GitHub Desktop.
Save smoser/4c5480b3c98c37a4eff0715e51f2c27b to your computer and use it in GitHub Desktop.
use usd-import to import a bunch of packages

Import Lots of packages

This is just a simple wrapper and doc on how to import a lot of packages.

Setup

This assumes you have usd-import in your path.

git clone http://git.launchpad.net/usd-importer
PATH=$PWD/usd-importer:$PATH

## from README.md install needed deps
sudo apt update -qy
deps="dpkg-dev python3-debian python3-launchpadlib python3-pygit2 python3-ubuntutools python3-pkg-resources"
sudo apt install -qy $deps </dev/null

## tools for usage here.
sudo apt-get install -qy parallel

Get a list of packages in the archive

This creates \<release>-\<component>.list files (yakkety-main.list) that just have a list of source packages in that release and component.

rel=yakkety
mirror="http://archive.ubuntu.com/ubuntu/"
for comp in main universe multiverse; do
    srcxz="$rel-$comp-sources.xz"
    echo "$rel-$comp"
    curl --silent "$mirror/dists/$rel/$comp/source/Sources.xz" > "$srcxz.tmp" &&
        mv "$srcxz.tmp" "$srcxz" &&
        xzcat < "$srcxz" | awk '$1 == "Package:" {print $2}' > "$rel-$comp.list"
done

Now run do-import-pkg using gnu parallel

Since the importer can be very network heavy, run a bunch in parallel. tweak --jobs to you liking.

rm -Rf work.d; mkdir work.d; cd work.d
f=../yakkety-main.list
time parallel --no-notice --ungroup --jobs=16 ../do-import-pkg \
    < "$f" 2>&1 | tee "run-${f##*/}.log"

Each package gets its own directory in current dir.

  • pkg.out: log of usd-import output. first line is the command that was run.

  • pkg.log: just timestamped log of start and stop.

  • pkg.result: a list of 'key: value' with information on that package.

    $ cat advancecomp.result
    took: 720
    took_human: 12m 0s
    downloaded: 5264K
    gitsize: 1568K
    

After finishing, do-import-pkg will clean the output directory to put all files other than git nd gitwd into a download directory.

#!/bin/bash
KID=""
LP_OWNER="usd-import-team"
error() { echo "$@" 1>&2; }
fail() { error "$@"; exit 1; }
log() { echo "$(date -R):[$$]" "$@" >> "$LOG"; error "$@"; }
cleanup() {
if [ -n "$KID" -a -d "/proc/$KID" ]; then
kill "$KID"
fi
}
sec2human() {
local delta=$1 fmt="${2:-full}" year day hour min sec rem=0
rem=$delta
year=$(($rem/(365*24*60*60))) || return
rem=$(($rem-($year*365*24*60*60)))
day=$(($rem/(24*60*60)))
rem=$(($rem-($day*24*60*60)))
hour=$(($rem/(60*60)))
rem=$(($rem-($hour*60*60)))
min=$(($rem/(60)))
rem=$(($rem-($min*60)))
sec=$rem
local t="" unit short long
local full="" tfull="" brev="" tbrev=""
for t in "$year y year" "$day d day" "$hour h hour" \
"$min m minute" "$sec s second"; do
set -- $t
unit=$1; short=$2; long=$3
[ "$1" = "1" ] || long="${long}s"
full="$full $unit $long"
brev="$brev $unit$short"
if [ "$unit" != "0" ] || [ -n "$tfull" ]; then
tfull="$tfull $unit $long"
tbrev="$tbrev $unit$short"
fi
done
tfull=${tfull# }
full=${full# }
tbrev=${tbrev# }
brev=${brev# }
[ -z "$tbrev" ] && tbrev="0s"
[ -z "$tfull" ] && tfull="0 seconds"
case "$fmt" in
full) _RET="$full";; # full words with leading
tfull) _RET="$tfull";; # full words, no leading 0
short) _RET="$brev";; # abbreviated with leading
tshort) _RET="$tbrev";; # abbreviated no leading 0
*) echo "bad format '$fmt'"; return 1;;
esac
return
}
dl_info() {
local d="$1" prefix="${2:-${1}_}" lsout="" num="" size=""
if [ ! -d "$1" ]; then
_RET_num=0
_RET_size=0
return 0
fi
lsout=$(cd "$d" && ls -l) ||
{ error "failed ls in $d"; return 1; }
num=$(echo "$lsout" |
awk '$9 ~ pack_dsc { count+=1 }; END { print count }' \
pack_dsc="${prefix}.*dsc")
size=$(echo "$lsout" |
awk '$9 ~ pack { tot+= $5 }; END { print tot / 1024 }' \
pack="^${prefix}")
_RET_num=$num
_RET_size=${size%.*}
return 0
}
trap cleanup EXIT
pkg="$1"
LOG="$pkg.log"
output="$pkg.out"
stime=$SECONDS
rm -Rf "$pkg" "$output" "$LOG"
sdate="$(date -R)"
dl_info "$pkg" || fail "failed getting dl_info for $pkg"
start_dsc="${_RET_num}"
start_size="${_RET_size}"
cmd=(
usd-import --verbose "--lp-owner=$LP_OWNER"
"--directory=$pkg" --no-push "$pkg" )
log "starting $pkg"
echo "$sdate: ${cmd[*]}" > "$output"
"${cmd[@]}" >>"$output" 2>&1 &
KID=$!
wait $KID
ret=$?
KID=""
etime=$SECONDS
edate=$(date -R)
echo "$edate: exited $ret" >> "$output"
delta=$((etime-stime))
sec2human "$delta" tshort
delta_human="$_RET"
out=$(du -hsk "$pkg/git") &&
git_size=$(echo "$out" | awk '{print $1}') || git_size="FAIL"
dl_info "$pkg" || fail "failed getting dl_info for $pkg"
end_dsc="${_RET_num}"
end_size="${_RET_size}"
dsc_count=$((${end_dsc}-${start_dsc}))
dl_size=$((${end_size}-${start_size}))
log "finished $pkg. returned $ret. took ${delta}s (${delta_human})." \
"downloaded: ${dl_size}K git_size: ${git_size}K dsc_count: ${dsc_count}"
{
echo "date_end: ${edate}"
echo "date_start: ${sdate}"
echo "downloaded: ${dl_size}K"
echo "dsc_count: ${dsc_count}"
echo "gitsize: ${git_size}K"
echo "result: $ret"
echo "took: $delta"
echo "took_human: $delta_human"
} > "$pkg.result"
exit "$ret"
#!/bin/bash
files_in_dir() {
local i=""
_RET=( )
for i in "$1"/$2; do
[ -f "$i" ] && _RET[${#_RET[@]}]="$i"
done
return 0
}
error() { echo "$@" 1>&2; }
fail() { [ $# -eq 0 ] || error "$@"; exit 1; }
why_fail() {
local i="" line="" t="" reason="" p=""
for i in "$@"; do
# OSError: [Errno 101] Network is unreachable
# ubuntutools.archive.DownloadError: File casper_1.213.tar.gz could not be found
# ubuntutools.archive.DownloadError: https://launchpad.net/ubuntu/+archive/primary/+files/cdrkit_1.1.6-1ubuntu3.dsc: 503 Service Temporarily Unavailable
#
# grab the 3rd from the last line.
line=$(tail -n 3 "$i" | head -n 1)
case "$line" in
OSError:*Errno\ 101*)
reason="OSError 101 [Network unreachable]"
;;
*DownloadError:\ File\ *)
t=${line#*File }; t=${t%% *}
reason="DownloadError missing file [$t]"
;;
*DownloadError:*503\ *)
reason="DownloadError 503 Unavailable"
;;
*) reason="Unknown: $line";;
esac
p="${i##*/}"
p=${p%.out}
printf "%-40s: %s\n" "$p" "$reason"
done
}
if [ $# -eq 1 -a -d "$1" ]; then
files_in_dir "$1" "*.out"
files=( "${_RET[@]}" )
[ -f "${files[0]}" ] || fail "$1 had no .out files in it"
elif [ $# -eq 0 ]; then
files_in_dir "$PWD" "*.out"
files=( "${_RET[@]}" )
[ -f "${files[0]}" ] || fail "cwd ($PWD) had no .out files in it"
else
files=( "$@" )
fi
working=( )
pass=( )
fail=( )
for i in "${files[@]}"; do
rfile="${i%.out}.result"
if [ ! -e "$rfile" ]; then
working[${#working[@]}]="$i"
continue
fi
result=$(awk '$1 == "result:" { print $2 }' "$rfile") &&
[ -n "$result" ] || fail "failed to get result from $rfile"
case "$result" in
0) pass[${#pass[@]}]="$i";;
*) fail[${#fail[@]}]="$i"
why_fail "$i";;
esac
done
total=$((${#working[@]}+${#pass[@]}+${#fail[@]}))
echo "$total total. ${#working[@]} working. ${#pass[@]} pass. ${#fail[@]} fail."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment