Skip to content

Instantly share code, notes, and snippets.

@river24
Created March 7, 2013 10:18
Show Gist options
  • Save river24/5107017 to your computer and use it in GitHub Desktop.
Save river24/5107017 to your computer and use it in GitHub Desktop.
IEICEの2013年03月の総合大会のProceedingsを僕好みにするシェルスクリプト
#!/bin/sh
# ---- ここから設定箇所 ----
# Proceedingsの中身をHDDにコピーして,その中の"Settings"ディレクトリを以下に指定してください.
ROOT_DIR="/Users/river24/Desktop/EIC2013G/Settings"
# 処理中の一時ファイルを出力する先を指定してください.
TMP_DIR="/tmp"
# ---- ここまで設定箇所 ----
# 処理中に生成される一時ファイルです.
PDF_LIST="${TMP_DIR}/pdf.list"
PAGE_LIST="${TMP_DIR}/page.list"
LINK_LIST="${TMP_DIR}/link.list"
TMP_HTML="${TMP_DIR}/tmp.html"
SESSION_LIST="${TMP_DIR}/session.list"
PAPER_LIST="${TMP_DIR}/paper.list"
cd "${ROOT_DIR}"
# このスクリプトによる変更を施す前の './html' ディレクトリを,
# './html.original' としてバックアップします.
# このスクリプトによる変更を施す前に戻したい場合は,これから復元して下さい.
if [ -e "./html.original" ]
then
sleep 0
else
if [ -e "./html" ]
then
cp -r "./html" "./html.original"
else
echo "'./html' directory is not found."
exit
fi
fi
if [ -e "./html.original" ]
then
if [ -e "./html" ]
then
rm -rf "./html"
fi
cp -r "./html.original" "./html"
else
echo "failed to backup './html' directory as './html.original'."
exit
fi
if [ -e "./html" ]
then
sleep 0
else
echo "'./html' directory is not found."
exit
fi
if [ -e ${PDF_LIST} ]
then
rm "${PDF_LIST}"
fi
find "./pdf" -type f | egrep "[^\/]+\.pdf" | perl -pe 's/^.*\/([^\/]+\.pdf).*$/$1/g' | sort | uniq > ${PDF_LIST}
find "./html" -type f | egrep "[^\/]+\.html" | xargs chmod 644
find "./html" -type f | egrep "[^\/]+\.html" |
while read HTML
do
cat ${HTML} | perl -pe 's/<\/td>/<\/td>\n/g' | perl -pe 's/<tr>/<tr>\n/g' | perl -pe 's/<\/tr>/<\/tr>\n/g' > ${TMP_HTML}
cp ${TMP_HTML} ${HTML}
done
for PDF in `cat ${PDF_LIST}`
do
echo ${PDF}
if [ -e ${PAGE_LIST} ]
then
rm "${PAGE_LIST}"
fi
find "./html" -type f | egrep "[^\/]+\.html" | xargs grep "${PDF}" | perl -pe 's/^([^:]+):.*(<a [^>]*href=\"[^\"]*${PDF}\"[^>]*>.+<\/a>).*$/$1:$2/g' > ${PAGE_LIST}
if [ -e ${PAGE_LIST} ]
then
if [ -e ${LINK_LIST} ]
then
rm "${LINK_LIST}"
fi
touch ${LINK_LIST}
echo '<form><select style="width: 60px; font-size: 8px;" onChange="location.href=value;"><option value="#">▼Link</option>' >> ${LINK_LIST}
cat ${PAGE_LIST} |
while read LINE
do
FLAG=`echo "${LINE}" | md5`
PAGE=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$1/g'`
TEXT=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$2/g' | perl -pe 's/^.*<a [^>]*>(.+)<\/a>.*$/$1/g' | perl -pe 's/<.*?>//g'`
echo "<option value=\"../.${PAGE}#${FLAG}\">${TEXT}</option>" >> ${LINK_LIST}
done
echo '</select></form>' >> ${LINK_LIST}
cat ${PAGE_LIST} |
while read LINE
do
FLAG=`echo "${LINE}" | md5`
PAGE=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$1/g'`
TARGET=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$2/g'`
LINKS=`cat ${LINK_LIST}`
cat ${PAGE} | perl -pe "s|\Q${TARGET}\E|${TARGET}<a name='${FLAG}' id='${FLAG}'>&nbsp;</a><br />${LINKS}|g" > ${TMP_HTML}
cp ${TMP_HTML} ${PAGE}
done
fi
done
cat "./html/program/schedule.html" | perl -pe 's|<br />\n|<br /><KAIGYOU>|g' | perl -pe 's|<hr />|\n <hr />\n |g' > ${TMP_HTML}
cp ${TMP_HTML} "./html/program/schedule.html"
egrep "[A-Z]+\-[0-9]+\. " "./html/program/schedule.html" > ${SESSION_LIST}
cat ${SESSION_LIST} |
while read SESSION
do
if [ -e $PAPER_LIST ]
then
rm $PAPER_LIST
fi
touch $PAPER_LIST
SESSION_ID=`echo " ${SESSION}" | perl -pe 's|^.*[^A-Z]+([A-Z]+\-[0-9]+)\..*$|$1|g'`
FLAG=`echo ${SESSION} | md5`
SESSION_TYPE1=`echo ${SESSION} | egrep "<[0-9]+〜[0-9]+>" | wc -l`
if [ ${SESSION_TYPE1} -eq 1 ]
then
SESSION_START=`echo " ${SESSION}" | perl -pe 's|^.*[^A-Z]+([A-Z]+\-[0-9]+)\..*<([0-9]+)〜([0-9]+)>.*$|$2|g'`
SESSION_END=`echo " ${SESSION}" | perl -pe 's|^.*[^A-Z]+([A-Z]+\-[0-9]+)\..*<([0-9]+)〜([0-9]+)>.*$|$3|g'`
i=${SESSION_START}
while [ ${i} -le ${SESSION_END} ]
do
echo "${SESSION_ID}-${i}" >> ${PAPER_LIST}
i=`expr ${i} + 1`
done
else
SESSION_TYPE2=`echo ${SESSION} | egrep "<[0-9]+>" | wc -l`
if [ ${SESSION_TYPE2} -eq 1 ]
then
SESSION_START=`echo ${SESSION} | perl -pe 's|^(.)| $1|g' | perl -pe 's|^.*[^A-Z]+([A-Z]+\-[0-9]+)\..*<([0-9]+)>.*$|$2|g'`
echo "${SESSION_ID}-${SESSION_START}" >> ${PAPER_LIST}
else
echo ${SESSION_ID} >> ${PAPER_LIST}
fi
fi
cat ${PAPER_LIST} |
while read PAPER_ID
do
find "./html" -type f | egrep "[^\/]+\.html" | xargs egrep "<option .*>${PAPER_ID}</option>" |
while read LINE
do
PAGE=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$1/g'`
TARGET=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$2/g'`
cat ${PAGE} | perl -pe "s|${TARGET}$|${TARGET}<option value='../../html/program/schedule.html#${FLAG}'>■スケジュール一覧で確認■</option>|g" > ${TMP_HTML}
cp ${TMP_HTML} ${PAGE}
done
done
SESSION_TOP=`cat ${PAPER_LIST} | head -n 1`
find "./html" -type f | egrep "[^\/]+\.html" | xargs egrep "<option .*>${SESSION_TOP}</option>" | head -n 1 |
while read LINE
do
TARGET=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$2/g'`
LINK=`echo ${TARGET} | perl -pe 's|^.* value="([^"]+)".*$|$1|g'`
NEW_SESSION=`echo "${SESSION}" | perl -pe "s|${SESSION_ID}|<a href='${LINK}' name='${FLAG}' id='${FLAG}'>${SESSION_ID}</a>|g"`
cat "./html/program/schedule.html" | perl -pe "s|${SESSION}|${NEW_SESSION}|g" > ${TMP_HTML}
cp ${TMP_HTML} "./html/program/schedule.html"
done
done
cat "./html/program/schedule.html" | perl -pe 's|<KAIGYOU>|\n|g' | perl -pe 's|^<td| <td|g' | perl -pe 's|^</tr>| </tr>|g' | perl -pe 's|^( <td class="bumon">&nbsp;</td>)$| $1|g' > ${TMP_HTML}
cp ${TMP_HTML} "./html/program/schedule.html"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment