Created
September 13, 2011 09:31
-
-
Save river24/1213484 to your computer and use it in GitHub Desktop.
IEICEの2011年度ソサイエティ大会のProceedingsを少し美味しくするシェルスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# ---- ここから設定箇所 ---- | |
# Proceedingsの中身をHDDにコピーして,その中の"Settings"ディレクトリを以下に指定してください. | |
ROOT_DIR="/Users/river24/Desktop/EIC2011S/Settings" | |
# 処理中の一時ファイルを出力する先を指定してください. | |
TMP_DIR="/tmp" | |
# ---- ここまで設定箇所 ---- | |
# 処理中に生成される一時ファイルです. | |
PDF_LIST="${TMP_DIR}/pdf.list" | |
PAGE_LIST="${TMP_DIR}/page.list" | |
LINK_LIST="${TMP_DIR}/link.list" | |
TMP_HTML="${TMP_DIR}/tmp.html" | |
cd "$ROOT_DIR" | |
# このスクリプトによる変更を施す前の './html' ディレクトリを, | |
# './html.original' としてバックアップします. | |
# このスクリプトによる変更を施す前に戻したい場合は,これから復元して下さい. | |
if [ -e "./html.original" ] | |
then | |
sleep 0 | |
else | |
if [ -e "./html" ] | |
then | |
cp -r "./html" "./html.original" | |
else | |
echo "'./html' directory is not found." | |
exit | |
fi | |
fi | |
if [ -e "./html.original" ] | |
then | |
if [ -e "./html" ] | |
then | |
rm -rf "./html" | |
fi | |
cp -r "./html.original" "./html" | |
else | |
echo "failed to backup './html' directory as './html.original'." | |
exit | |
fi | |
if [ -e "./html" ] | |
then | |
sleep 0 | |
else | |
echo "'./html' directory is not found." | |
exit | |
fi | |
if [ -e $PDF_LIST ] | |
then | |
rm "$PDF_LIST" | |
fi | |
find "./pdf" -type f | egrep "[^\/]+\.pdf" | perl -pe 's/^.*\/([^\/]+\.pdf).*$/$1/g' | sort | uniq > $PDF_LIST | |
find "./html" -type f | grep "html" | xargs chmod 644 | |
find "./html" -type f | grep html | | |
while read HTML | |
do | |
cat ${HTML} | perl -pe 's/<\/td>/<\/td>\n/g' > ${TMP_HTML} | |
cp ${TMP_HTML} ${HTML} | |
done | |
for PDF in `cat $PDF_LIST` | |
do | |
echo $PDF | |
if [ -e $PAGE_LIST ] | |
then | |
rm "$PAGE_LIST" | |
fi | |
find "./html" -type f | grep html | xargs grep "$PDF" | perl -pe 's/^([^:]+):.*(<a [^>]*href=\"[^\"]*$PDF\"[^>]*>[^<]+<\/a>).*$/$1:$2/g' > $PAGE_LIST | |
if [ -e $PAGE_LIST ] | |
then | |
if [ -e $LINK_LIST ] | |
then | |
rm "$LINK_LIST" | |
fi | |
touch $LINK_LIST | |
echo '<form><select style="width: 60px; font-size: 8px;" onChange="location.href=value;"><option value="#">▼Link</option>' >> $LINK_LIST | |
cat $PAGE_LIST | | |
while read LINE | |
do | |
FLAG=`echo "$LINE" | md5` | |
PAGE=`echo "$LINE" | perl -pe 's/^([^:]+):(.*)$/$1/g'` | |
TEXT=`echo "$LINE" | perl -pe 's/^([^:]+):(.*)$/$2/g' | perl -pe 's/^.*<a [^>]*>([^<]+)<\/a>.*$/$1/g'` | |
echo "<option value=\"../.${PAGE}#${FLAG}\">${TEXT}</option>" >> $LINK_LIST | |
done | |
echo '</select></form>' >> $LINK_LIST | |
cat $PAGE_LIST | | |
while read LINE | |
do | |
FLAG=`echo "$LINE" | md5` | |
PAGE=`echo "$LINE" | perl -pe 's/^([^:]+):(.*)$/$1/g'` | |
TARGET=`echo "$LINE" | perl -pe 's/^([^:]+):(.*)$/$2/g'` | |
LINKS=`cat $LINK_LIST` | |
cat ${PAGE} | perl -pe "s|${TARGET}|${TARGET}<a name='${FLAG}' id='${FLAG}'> </a><br />${LINKS}|g" > ${TMP_HTML} | |
cp ${TMP_HTML} ${PAGE} | |
done | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment