Last active
February 26, 2018 23:32
-
-
Save bathtime/0d4954a5c6275539bbe00da5ce2446df to your computer and use it in GitHub Desktop.
Lewis and Short Elementary Latin Dictionary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This program requires an xml dictionary file to run. If it is not on your machine, | |
# it will automatically be downloaded and stored in ~/.config/latin/. | |
# Name this file as 'latin' and run: | |
# | |
# $ chmod +x latin | |
# | |
# To run: | |
# $ ./latin amo | |
# | |
# To enable internet auto-decline: | |
# $ ./latin -d amo | |
# | |
# To run with only auto-decline: | |
# $ ./latin -c amo | |
# | |
# Where 'amo' is the term searched. | |
key=$2 | |
URL="http://www.perseus.tufts.edu/hopper/morph?l=$key&la=la" | |
wFIN='<h4 class="la">' | |
wFOUT='</h4>' | |
wDefIn='<span class="lemma_definition">' | |
wDefOut='</span>' | |
wFormIn='<td class="la">'$key'</td>' | |
wFormOut='<td style="font-size: x-small">' | |
## Code which connects to perseus to attain 1st per. sg. (needed as key for xml file) | |
if [[ ("$1" == "-d") ]]; then | |
searchTerms=$(wget -q -O- "$URL" | mawk -v vWFIN="$wFIN" -v vWFOUT="$wFOUT" \ | |
' $0 ~ vWFIN,$0 ~ vWFOUT {printf substr($0,18, length($0)-22)"\n"; next;}') | |
elif [[ ("$1" == "-c") ]]; then | |
wget -q -O- "$URL" | mawk -v vDefIn="$wDefIn" -v vDefOut="$wDefOut" -v vFormIn="$wFormIn" -v vFormOut="$wFormOut" -v vWFIN="$wFIN" -v vWFOUT="$wFOUT" \ | |
' $0 ~ vWFIN,$0 ~ vWFOUT {printf "\n[ " substr($0,18, length($0)-22)" ]"; next;} $0 ~ vDefIn,$0 ~ vDefOut {{ if (!/>/) {{$1=$1}1; x+=1; print " "$0"";} }} $0 ~ vFormIn,$0 ~ vFormOut {{ if (!/td /) {{$1=$1}1; $0=substr($0,5, length($0)-9); print "-"$0; next;} } }' | |
else | |
searchTerms=$1 | |
fi | |
if [ "$1" == "-c" ]; then | |
exit | |
fi | |
XMLfile=Perseus_text_1999.04.0060.xml | |
XMLdir=~/.config/latin/ | |
XMLlink="http://www.perseus.tufts.edu/hopper/dltext?doc=Perseus:text:1999.04.0060" | |
if [ ! -e $XMLdir$XMLfile ]; then | |
echo "\nFile:" $XMLdir$XMLfile "not found.\n\nDownloading from" $XMLlink "...\n" | |
mkdir -p ~/.config/latin | |
# Trim DOS' CR's to make Linux compatible | |
wget -qO- $XMLlink | tr -d '\r' > $XMLdir$XMLfile | |
fi | |
for key in $searchTerms; do | |
keyIn='key="'$key'"' # Which tag shall be searched? | |
keyOut='</entry>' # | |
tagIn='<' # How are tags to be distinguished? | |
tagOut='>' # | |
defTagIn='<sense' # Ad | |
defTagOut='>' | |
keySepA='a' # Separates the main word from its roots | |
keySepB=',' # | |
etySepA='[' # Etymology left | |
etySepB=']\n\n • ' # Etymology right | |
defSep='\n\n ' # Separates individual definitions | |
emSep='\n\n • ' # Separates em-dashes | |
# First concatenate the result into a usable string else text is difficult to manipulate | |
awk -v vkeyIn="$keyIn" -v vkeyOut="$keyOut" -v vdefTagIn="$defTagIn" -v vdefTagOut="$defTagOut" -v tagIn="$tagIn" -v tagOut="$tagOut" -v vkeySepA="$keySepA" -v vkeySepB="$keySepB" -v vdefSep="$defSep" -v vetySepA="$etySepA" -v vetySepB="$etySepB" -v vemSep="$emSep" ' | |
$0 ~ vkeyIn, $0 ~ vkeyOut { WRK = WRK $0; next; }END{ | |
$0 = WRK | |
# Separation after main key word | |
sub(/<orth>/, vkeySepA) | |
sub(/<\/orth>/, vkeySepB) | |
# Add missing dot after gender | |
gsub(/<\/gen>/, ". ") | |
# Add separation for several variations of definitions | |
sub(/<sense id.*><etym lang="la" opt="n">/, vetySepA) | |
sub(/<\/etym>\, <trans opt="n">|<\/etym>\.—/, vetySepB) | |
sub(/(<\/etym>\. |<\/etym>\. —<\/sense>)/, "]") | |
# Get rid of potential extra definition markers | |
gsub (/(\.|<\/usg>) ?— ?<\/sense>/, ".") | |
# Collapse all definition tags and add formatting in their place | |
gsub(vdefTagIn "[^" vdefTagOut "]*" vdefTagOut, vdefSep) | |
# Collapse all remaining tags | |
gsub(tagIn "[^" tagOut "]*" tagOut, "") | |
# Separate em-dash text | |
if ((!/—\\,/) && (!/[[:alnum:]]—/) && (!/ —/)) gsub (/—/, vemSep) | |
if ((!/—\\,/) ) gsub (/\.—/, "." vemSep) | |
gsub (/ — ?/, vemSep) | |
# Remove spaces from left and right of certain characters | |
gsub(/ +/, " ") | |
gsub(/ ,/, ",") | |
gsub(/\( /, "(") | |
gsub(/ \)/, ")") | |
gsub(/ \./, ".") | |
gsub(/ \:/, ":") | |
gsub(/ \?/, "?") | |
gsub(/\‘ /, "‘") | |
gsub(/ \’/, "’") | |
gsub(/^ /, "" ) | |
gsub(/\.\.\. /, "...") | |
print "\n" $0 "\n" | |
} ' $XMLdir$XMLfile | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment