Created
April 8, 2015 22:40
-
-
Save sleep-walker/18bb7f6e84987975848a to your computer and use it in GitHub Desktop.
license changes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#### | |
# Configuration | |
##### | |
DOCS="https://docs.google.com/spreadsheet/pub?hl=en_US&hl=en_US&key=0AqPp4y2wyQsbdGQ1V3pRRDg5NEpGVWpubzdRZ0tjUWc&single=true&gid=0&output=txt" | |
LICENSES="http://spdx.org/licenses/" | |
# upper table contains <td align="center"> and the other does not so I will use it to distinguish between them | |
identifiers_xpath='cat //tr/td[@align="center"]/../td/code/text()' | |
fullnames_xpath='cat //tr/td[@align="center"]/../td/a[@rel]/text()' | |
# simple output to stderr | |
err() { | |
echo "$@" >> /dev/stderr | |
} | |
fetch_or_die() { | |
# DEBUG | |
case "$1" in | |
"$DOCS") | |
cat DOCS | |
return ;; | |
"$LICENSES") | |
cat LICENSES | |
return ;; | |
esac | |
if ! curl -# "$1" -o -; then | |
err "Couldn't fetch '$1'" | |
exit 10 | |
fi | |
} | |
xpath() { | |
# Apply XPath on HTML file, filter separators and prompts | |
# $1 HTML | |
# $2 xpath | |
xmllint --html --shell "$1" <<< "$2" | \ | |
grep -vE '^( -------|/ >)' | |
} | |
export LC_ALL=C | |
##### | |
# Step 1 - get some WTF spreadsheet with some licenses | |
## | |
# download spreadsheet, remove line with 'New format', remove trailing whitespaces | |
fetch_or_die "$DOCS" | sed '/New format/d; s@[[:blank:]]*$@@' > licenses_changes.ntxt | |
# take every identifier with 'SUSE-' prefix, print in form | |
# license+ tabulator license+ | |
# (but WHY?!) | |
sed -n 's@^\(SUSE-[^[:blank:]]\+\)\t.*@\1+\t\1+@p' > licenses_changes.ptxt < licenses_changes.ntxt | |
#return 2> /dev/null || exit | |
##### | |
# Step 2 - download spdx license table | |
## | |
# allocate temporary file for HTML page | |
license_tmp=$(mktemp) | |
# fetch html page with license table | |
fetch_or_die "$LICENSES" > "$license_tmp" | |
# parse licenses from the HTML | |
readarray -t identifiers < <(xpath "$license_tmp" "$identifiers_xpath") | |
readarray -t fullnames < <(xpath "$license_tmp" "$fullnames_xpath") | |
# Do some sanity checks for parsed results | |
if [ ${#identifiers[@]} -ne ${#fullnames[@]} ]; then | |
err "Number of identifiers and fullnames read from '$DOCS' doesn't match." | |
err "identifiers read: ${#identifiers[@]}" | |
err "fullnames read: ${#fullnames[@]}" | |
exit 2 | |
elif [ ${#identifiers[@]} -eq 0 ]; then | |
err "No license read" | |
exit 3 | |
fi | |
# clean up! | |
rm "$license_tmp" | |
# bullshit - tell me why?! | |
#for i in "${identifiers[@]}"; do | |
# echo "$i $i" >> licenses_changes.ntxt | |
# echo "$i+ $i+" >> licenses_changes.ptxt | |
#done | |
# check for duplicities (why the hell?) | |
# in license short string | |
dups="$( | |
{ | |
cut -d$'\t' -f1 licenses_changes.ntxt | |
export IFS=$'\n' | |
echo "${identifiers[*]}" | |
} | sort -u | sed 's@^SUSE-@@' | sort | uniq -d)" | |
# in description | |
dups="${dups}$( | |
{ | |
cut -d$'\t' -f2 licenses_changes.ntxt | |
export IFS=$'\n' | |
echo "${identifiers[*]}" | |
} | sort | uniq -d)" | |
# if we found any duplicates - end with shame | |
if [ -n "$dups" ]; then | |
echo "DUPS $dups" | |
exit 1 | |
fi | |
{ | |
cat <<EOF | |
This is the git for openSUSE:Tools/obs-service-format_spec_file | |
It happens to be *the* repository for valid licenses to be used in openSUSE spec files | |
# [SPDX Licenses](http://spdx.org/licenses) | |
License Tag | Description | |
----------- | ----------- | |
EOF | |
for i in "${!identifiers[@]}"; do | |
echo "${identifiers[i]} | ${fullnames[i]}" | |
done | |
cat <<EOF | |
# SUSE Additions | |
|License Tag| | |
|-----------| | |
EOF | |
sed -n 's@^\(SUSE-[^[:blank:]]*\)\t.*@|\1|@p' licenses_changes.ntxt | sort -u | |
} > README.md | |
{ | |
echo "First line" | |
# why the hell? like licenses_changes.ptxt ever contained anything useful! | |
cat licenses_changes.ntxt licenses_changes.ptxt | sort -u | |
} > licenses_changes.txt | |
rm licenses_changes.{p,n}txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment