Skip to content

Instantly share code, notes, and snippets.

@bathtime
Last active February 26, 2018 23:32
Show Gist options
  • Save bathtime/0d4954a5c6275539bbe00da5ce2446df to your computer and use it in GitHub Desktop.
Save bathtime/0d4954a5c6275539bbe00da5ce2446df to your computer and use it in GitHub Desktop.
Lewis and Short Elementary Latin Dictionary
#!/bin/bash
# This program requires an xml dictionary file to run. If it is not on your machine,
# it will automatically be downloaded and stored in ~/.config/latin/.
# Name this file as 'latin' and run:
#
# $ chmod +x latin
#
# To run:
# $ ./latin amo
#
# To enable internet auto-decline:
# $ ./latin -d amo
#
# To run with only auto-decline:
# $ ./latin -c amo
#
# Where 'amo' is the term searched.
key=$2
URL="http://www.perseus.tufts.edu/hopper/morph?l=$key&la=la"
wFIN='<h4 class="la">'
wFOUT='</h4>'
wDefIn='<span class="lemma_definition">'
wDefOut='</span>'
wFormIn='<td class="la">'$key'</td>'
wFormOut='<td style="font-size: x-small">'
## Code which connects to perseus to attain 1st per. sg. (needed as key for xml file)
if [[ ("$1" == "-d") ]]; then
searchTerms=$(wget -q -O- "$URL" | mawk -v vWFIN="$wFIN" -v vWFOUT="$wFOUT" \
' $0 ~ vWFIN,$0 ~ vWFOUT {printf substr($0,18, length($0)-22)"\n"; next;}')
elif [[ ("$1" == "-c") ]]; then
wget -q -O- "$URL" | mawk -v vDefIn="$wDefIn" -v vDefOut="$wDefOut" -v vFormIn="$wFormIn" -v vFormOut="$wFormOut" -v vWFIN="$wFIN" -v vWFOUT="$wFOUT" \
' $0 ~ vWFIN,$0 ~ vWFOUT {printf "\n[ " substr($0,18, length($0)-22)" ]"; next;} $0 ~ vDefIn,$0 ~ vDefOut {{ if (!/>/) {{$1=$1}1; x+=1; print " "$0"";} }} $0 ~ vFormIn,$0 ~ vFormOut {{ if (!/td /) {{$1=$1}1; $0=substr($0,5, length($0)-9); print "-"$0; next;} } }'
else
searchTerms=$1
fi
if [ "$1" == "-c" ]; then
exit
fi
XMLfile=Perseus_text_1999.04.0060.xml
XMLdir=~/.config/latin/
XMLlink="http://www.perseus.tufts.edu/hopper/dltext?doc=Perseus:text:1999.04.0060"
if [ ! -e $XMLdir$XMLfile ]; then
echo "\nFile:" $XMLdir$XMLfile "not found.\n\nDownloading from" $XMLlink "...\n"
mkdir -p ~/.config/latin
# Trim DOS' CR's to make Linux compatible
wget -qO- $XMLlink | tr -d '\r' > $XMLdir$XMLfile
fi
for key in $searchTerms; do
keyIn='key="'$key'"' # Which tag shall be searched?
keyOut='</entry>' #
tagIn='<' # How are tags to be distinguished?
tagOut='>' #
defTagIn='<sense' # Ad
defTagOut='>'
keySepA='a' # Separates the main word from its roots
keySepB=',' #
etySepA='[' # Etymology left
etySepB=']\n\n • ' # Etymology right
defSep='\n\n ' # Separates individual definitions
emSep='\n\n • ' # Separates em-dashes
# First concatenate the result into a usable string else text is difficult to manipulate
awk -v vkeyIn="$keyIn" -v vkeyOut="$keyOut" -v vdefTagIn="$defTagIn" -v vdefTagOut="$defTagOut" -v tagIn="$tagIn" -v tagOut="$tagOut" -v vkeySepA="$keySepA" -v vkeySepB="$keySepB" -v vdefSep="$defSep" -v vetySepA="$etySepA" -v vetySepB="$etySepB" -v vemSep="$emSep" '
$0 ~ vkeyIn, $0 ~ vkeyOut { WRK = WRK $0; next; }END{
$0 = WRK
# Separation after main key word
sub(/<orth>/, vkeySepA)
sub(/<\/orth>/, vkeySepB)
# Add missing dot after gender
gsub(/<\/gen>/, ". ")
# Add separation for several variations of definitions
sub(/<sense id.*><etym lang="la" opt="n">/, vetySepA)
sub(/<\/etym>\, <trans opt="n">|<\/etym>\.—/, vetySepB)
sub(/(<\/etym>\. |<\/etym>\. —<\/sense>)/, "]")
# Get rid of potential extra definition markers
gsub (/(\.|<\/usg>) ?— ?<\/sense>/, ".")
# Collapse all definition tags and add formatting in their place
gsub(vdefTagIn "[^" vdefTagOut "]*" vdefTagOut, vdefSep)
# Collapse all remaining tags
gsub(tagIn "[^" tagOut "]*" tagOut, "")
# Separate em-dash text
if ((!/—\\,/) && (!/[[:alnum:]]—/) && (!/ —/)) gsub (/—/, vemSep)
if ((!/—\\,/) ) gsub (/\.—/, "." vemSep)
gsub (/ — ?/, vemSep)
# Remove spaces from left and right of certain characters
gsub(/ +/, " ")
gsub(/ ,/, ",")
gsub(/\( /, "(")
gsub(/ \)/, ")")
gsub(/ \./, ".")
gsub(/ \:/, ":")
gsub(/ \?/, "?")
gsub(/\‘ /, "‘")
gsub(/ \’/, "’")
gsub(/^ /, "" )
gsub(/\.\.\. /, "...")
print "\n" $0 "\n"
} ' $XMLdir$XMLfile
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment