Skip to content

Instantly share code, notes, and snippets.

@sleep-walker
Created April 8, 2015 22:40
Show Gist options
  • Save sleep-walker/18bb7f6e84987975848a to your computer and use it in GitHub Desktop.
Save sleep-walker/18bb7f6e84987975848a to your computer and use it in GitHub Desktop.
license changes
#!/bin/bash
####
# Configuration
#####
DOCS="https://docs.google.com/spreadsheet/pub?hl=en_US&hl=en_US&key=0AqPp4y2wyQsbdGQ1V3pRRDg5NEpGVWpubzdRZ0tjUWc&single=true&gid=0&output=txt"
LICENSES="http://spdx.org/licenses/"
# upper table contains <td align="center"> and the other does not so I will use it to distinguish between them
identifiers_xpath='cat //tr/td[@align="center"]/../td/code/text()'
fullnames_xpath='cat //tr/td[@align="center"]/../td/a[@rel]/text()'
# simple output to stderr
err() {
echo "$@" >> /dev/stderr
}
fetch_or_die() {
# DEBUG
case "$1" in
"$DOCS")
cat DOCS
return ;;
"$LICENSES")
cat LICENSES
return ;;
esac
if ! curl -# "$1" -o -; then
err "Couldn't fetch '$1'"
exit 10
fi
}
xpath() {
# Apply XPath on HTML file, filter separators and prompts
# $1 HTML
# $2 xpath
xmllint --html --shell "$1" <<< "$2" | \
grep -vE '^( -------|/ >)'
}
export LC_ALL=C
#####
# Step 1 - get some WTF spreadsheet with some licenses
##
# download spreadsheet, remove line with 'New format', remove trailing whitespaces
fetch_or_die "$DOCS" | sed '/New format/d; s@[[:blank:]]*$@@' > licenses_changes.ntxt
# take every identifier with 'SUSE-' prefix, print in form
# license+ tabulator license+
# (but WHY?!)
sed -n 's@^\(SUSE-[^[:blank:]]\+\)\t.*@\1+\t\1+@p' > licenses_changes.ptxt < licenses_changes.ntxt
#return 2> /dev/null || exit
#####
# Step 2 - download spdx license table
##
# allocate temporary file for HTML page
license_tmp=$(mktemp)
# fetch html page with license table
fetch_or_die "$LICENSES" > "$license_tmp"
# parse licenses from the HTML
readarray -t identifiers < <(xpath "$license_tmp" "$identifiers_xpath")
readarray -t fullnames < <(xpath "$license_tmp" "$fullnames_xpath")
# Do some sanity checks for parsed results
if [ ${#identifiers[@]} -ne ${#fullnames[@]} ]; then
err "Number of identifiers and fullnames read from '$DOCS' doesn't match."
err "identifiers read: ${#identifiers[@]}"
err "fullnames read: ${#fullnames[@]}"
exit 2
elif [ ${#identifiers[@]} -eq 0 ]; then
err "No license read"
exit 3
fi
# clean up!
rm "$license_tmp"
# bullshit - tell me why?!
#for i in "${identifiers[@]}"; do
# echo "$i $i" >> licenses_changes.ntxt
# echo "$i+ $i+" >> licenses_changes.ptxt
#done
# check for duplicities (why the hell?)
# in license short string
dups="$(
{
cut -d$'\t' -f1 licenses_changes.ntxt
export IFS=$'\n'
echo "${identifiers[*]}"
} | sort -u | sed 's@^SUSE-@@' | sort | uniq -d)"
# in description
dups="${dups}$(
{
cut -d$'\t' -f2 licenses_changes.ntxt
export IFS=$'\n'
echo "${identifiers[*]}"
} | sort | uniq -d)"
# if we found any duplicates - end with shame
if [ -n "$dups" ]; then
echo "DUPS $dups"
exit 1
fi
{
cat <<EOF
This is the git for openSUSE:Tools/obs-service-format_spec_file
It happens to be *the* repository for valid licenses to be used in openSUSE spec files
# [SPDX Licenses](http://spdx.org/licenses)
License Tag | Description
----------- | -----------
EOF
for i in "${!identifiers[@]}"; do
echo "${identifiers[i]} | ${fullnames[i]}"
done
cat <<EOF
# SUSE Additions
|License Tag|
|-----------|
EOF
sed -n 's@^\(SUSE-[^[:blank:]]*\)\t.*@|\1|@p' licenses_changes.ntxt | sort -u
} > README.md
{
echo "First line"
# why the hell? like licenses_changes.ptxt ever contained anything useful!
cat licenses_changes.ntxt licenses_changes.ptxt | sort -u
} > licenses_changes.txt
rm licenses_changes.{p,n}txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment