bathtime/latin.sh

## latin.sh
#!/bin/bash

# This program requires an xml dictionary file to run. If it is not on your machine,
# it will automatically be downloaded and stored in ~/.config/latin/.

# Name this file as 'latin' and run:
#
# $ chmod +x latin
#
# To run:
# $ ./latin amo
#
# To enable internet auto-decline:
# $ ./latin -d amo
#
# To run with only auto-decline:
# $ ./latin -c amo
#
# Where 'amo' is the term searched.

key=$2

URL="http://www.perseus.tufts.edu/hopper/morph?l=$key&la=la"

wFIN='<h4 class="la">'
wFOUT='</h4>'
wDefIn='<span class="lemma_definition">'
wDefOut='</span>'
wFormIn='<td class="la">'$key'</td>'
wFormOut='<td style="font-size: x-small">'

## Code which connects to perseus to attain 1st per. sg. (needed as key for xml file)
if [[ ("$1" == "-d") ]]; then

	searchTerms=$(wget -q -O- "$URL" | mawk -v vWFIN="$wFIN" -v vWFOUT="$wFOUT" \
	' $0 ~ vWFIN,$0 ~ vWFOUT {printf substr($0,18, length($0)-22)"\n"; next;}')

elif [[ ("$1" == "-c") ]]; then

	wget -q -O- "$URL" | mawk -v vDefIn="$wDefIn" -v vDefOut="$wDefOut" -v vFormIn="$wFormIn" -v vFormOut="$wFormOut" -v vWFIN="$wFIN" -v vWFOUT="$wFOUT" \
	' $0 ~ vWFIN,$0 ~ vWFOUT {printf "\n[ " substr($0,18, length($0)-22)" ]"; next;}   $0 ~ vDefIn,$0 ~ vDefOut {{ if (!/>/) {{$1=$1}1; x+=1; print " "$0"";} }}   $0 ~ vFormIn,$0 ~ vFormOut {{ if (!/td /) {{$1=$1}1;   $0=substr($0,5, length($0)-9); print "-"$0; next;} } }'

else
	searchTerms=$1
fi

if [ "$1" == "-c" ]; then
	exit
fi

XMLfile=Perseus_text_1999.04.0060.xml
XMLdir=~/.config/latin/
XMLlink="http://www.perseus.tufts.edu/hopper/dltext?doc=Perseus:text:1999.04.0060"

if [ ! -e $XMLdir$XMLfile ]; then
        echo "\nFile:" $XMLdir$XMLfile "not found.\n\nDownloading from" $XMLlink "...\n"
	mkdir -p ~/.config/latin

	# Trim DOS' CR's to make Linux compatible
	wget -qO- $XMLlink | tr -d '\r' > $XMLdir$XMLfile
fi

for key in $searchTerms; do

keyIn='key="'$key'"'	# Which tag shall be searched?
keyOut='</entry>'	#
tagIn='<'		# How are tags to be distinguished?
tagOut='>'		#
defTagIn='<sense'	# Ad
defTagOut='>'
keySepA='a'		# Separates the main word from its roots
keySepB=','		#
etySepA='['		# Etymology left
etySepB=']\n\n • '	# Etymology right
defSep='\n\n '          # Separates individual definitions
emSep='\n\n • '		# Separates em-dashes

# First concatenate the result into a usable string else text is difficult to manipulate
awk -v vkeyIn="$keyIn" -v vkeyOut="$keyOut" -v vdefTagIn="$defTagIn" -v vdefTagOut="$defTagOut" -v tagIn="$tagIn" -v tagOut="$tagOut" -v vkeySepA="$keySepA" -v vkeySepB="$keySepB" -v vdefSep="$defSep" -v vetySepA="$etySepA" -v vetySepB="$etySepB" -v vemSep="$emSep" '
$0 ~ vkeyIn, $0 ~ vkeyOut { WRK = WRK $0; next; }END{

	$0 = WRK

	# Separation after main key word
	sub(/<orth>/, vkeySepA)
	sub(/<\/orth>/, vkeySepB)

	# Add missing dot after gender
        gsub(/<\/gen>/, ". ")

	# Add separation for several variations of definitions
	sub(/<sense id.*><etym lang="la" opt="n">/, vetySepA)
	sub(/<\/etym>\, <trans opt="n">|<\/etym>\.—/, vetySepB)
	sub(/(<\/etym>\. |<\/etym>\. —<\/sense>)/, "]")

	# Get rid of potential extra definition markers
	gsub (/(\.|<\/usg>) ?— ?<\/sense>/, ".")

	# Collapse all definition tags and add formatting in their place
	gsub(vdefTagIn "[^" vdefTagOut "]*" vdefTagOut, vdefSep)

	# Collapse all remaining tags
	gsub(tagIn "[^" tagOut "]*" tagOut, "")

	# Separate em-dash text
	if ((!/—\\,/) && (!/[[:alnum:]]—/) && (!/ —/)) gsub (/—/, vemSep)
        if ((!/—\\,/) ) gsub (/\.—/, "." vemSep)
	gsub (/ — ?/, vemSep)

	# Remove spaces from left and right of certain characters
	gsub(/ +/,  " ")

	gsub(/ ,/,  ",")
	gsub(/\( /, "(")
	gsub(/ \)/, ")")
	gsub(/ \./, ".")
	gsub(/ \:/, ":")
	gsub(/ \?/, "?")
	gsub(/\‘ /, "‘")
	gsub(/ \’/, "’")
	gsub(/^ /,  "" )
	gsub(/\.\.\. /, "...")

	print "\n" $0 "\n"

}  ' $XMLdir$XMLfile

done
	#!/bin/bash

	# This program requires an xml dictionary file to run. If it is not on your machine,
	# it will automatically be downloaded and stored in ~/.config/latin/.

	# Name this file as 'latin' and run:
	#
	# $ chmod +x latin
	#
	# To run:
	# $ ./latin amo
	#
	# To enable internet auto-decline:
	# $ ./latin -d amo
	#
	# To run with only auto-decline:
	# $ ./latin -c amo
	#
	# Where 'amo' is the term searched.

	key=$2

	URL="http://www.perseus.tufts.edu/hopper/morph?l=$key&la=la"

	wFIN='<h4 class="la">'
	wFOUT='</h4>'
	wDefIn='<span class="lemma_definition">'
	wDefOut='</span>'
	wFormIn='<td class="la">'$key'</td>'
	wFormOut='<td style="font-size: x-small">'

	## Code which connects to perseus to attain 1st per. sg. (needed as key for xml file)
	if [[ ("$1" == "-d") ]]; then

	searchTerms=$(wget -q -O- "$URL" \| mawk -v vWFIN="$wFIN" -v vWFOUT="$wFOUT" \
	' $0 ~ vWFIN,$0 ~ vWFOUT {printf substr($0,18, length($0)-22)"\n"; next;}')

	elif [[ ("$1" == "-c") ]]; then

	wget -q -O- "$URL" \| mawk -v vDefIn="$wDefIn" -v vDefOut="$wDefOut" -v vFormIn="$wFormIn" -v vFormOut="$wFormOut" -v vWFIN="$wFIN" -v vWFOUT="$wFOUT" \
	' $0 ~ vWFIN,$0 ~ vWFOUT {printf "\n[ " substr($0,18, length($0)-22)" ]"; next;} $0 ~ vDefIn,$0 ~ vDefOut {{ if (!/>/) {{$1=$1}1; x+=1; print " "$0"";} }} $0 ~ vFormIn,$0 ~ vFormOut {{ if (!/td /) {{$1=$1}1; $0=substr($0,5, length($0)-9); print "-"$0; next;} } }'

	else
	searchTerms=$1
	fi

	if [ "$1" == "-c" ]; then
	exit
	fi

	XMLfile=Perseus_text_1999.04.0060.xml
	XMLdir=~/.config/latin/
	XMLlink="http://www.perseus.tufts.edu/hopper/dltext?doc=Perseus:text:1999.04.0060"

	if [ ! -e $XMLdir$XMLfile ]; then
	echo "\nFile:" $XMLdir$XMLfile "not found.\n\nDownloading from" $XMLlink "...\n"
	mkdir -p ~/.config/latin

	# Trim DOS' CR's to make Linux compatible
	wget -qO- $XMLlink \| tr -d '\r' > $XMLdir$XMLfile
	fi

	for key in $searchTerms; do

	keyIn='key="'$key'"' # Which tag shall be searched?
	keyOut='</entry>' #
	tagIn='<' # How are tags to be distinguished?
	tagOut='>' #
	defTagIn='<sense' # Ad
	defTagOut='>'
	keySepA='a' # Separates the main word from its roots
	keySepB=',' #
	etySepA='[' # Etymology left
	etySepB=']\n\n • ' # Etymology right
	defSep='\n\n ' # Separates individual definitions
	emSep='\n\n • ' # Separates em-dashes

	# First concatenate the result into a usable string else text is difficult to manipulate
	awk -v vkeyIn="$keyIn" -v vkeyOut="$keyOut" -v vdefTagIn="$defTagIn" -v vdefTagOut="$defTagOut" -v tagIn="$tagIn" -v tagOut="$tagOut" -v vkeySepA="$keySepA" -v vkeySepB="$keySepB" -v vdefSep="$defSep" -v vetySepA="$etySepA" -v vetySepB="$etySepB" -v vemSep="$emSep" '
	$0 ~ vkeyIn, $0 ~ vkeyOut { WRK = WRK $0; next; }END{

	$0 = WRK

	# Separation after main key word
	sub(/<orth>/, vkeySepA)
	sub(/<\/orth>/, vkeySepB)

	# Add missing dot after gender
	gsub(/<\/gen>/, ". ")

	# Add separation for several variations of definitions
	sub(/<sense id.*><etym lang="la" opt="n">/, vetySepA)
	sub(/<\/etym>\, <trans opt="n">\|<\/etym>\.—/, vetySepB)
	sub(/(<\/etym>\. \|<\/etym>\. —<\/sense>)/, "]")

	# Get rid of potential extra definition markers
	gsub (/(\.\|<\/usg>) ?— ?<\/sense>/, ".")

	# Collapse all definition tags and add formatting in their place
	gsub(vdefTagIn "[^" vdefTagOut "]*" vdefTagOut, vdefSep)

	# Collapse all remaining tags
	gsub(tagIn "[^" tagOut "]*" tagOut, "")

	# Separate em-dash text
	if ((!/—\\,/) && (!/[[:alnum:]]—/) && (!/ —/)) gsub (/—/, vemSep)
	if ((!/—\\,/) ) gsub (/\.—/, "." vemSep)
	gsub (/ — ?/, vemSep)

	# Remove spaces from left and right of certain characters
	gsub(/ +/, " ")

	gsub(/ ,/, ",")
	gsub(/\( /, "(")
	gsub(/ \)/, ")")
	gsub(/ \./, ".")
	gsub(/ \:/, ":")
	gsub(/ \?/, "?")
	gsub(/\‘ /, "‘")
	gsub(/ \’/, "’")
	gsub(/^ /, "" )
	gsub(/\.\.\. /, "...")

	print "\n" $0 "\n"

	} ' $XMLdir$XMLfile

	done