Created
March 7, 2013 10:18
-
-
Save river24/5107017 to your computer and use it in GitHub Desktop.
IEICEの2013年03月の総合大会のProceedingsを僕好みにするシェルスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# ---- ここから設定箇所 ---- | |
# Proceedingsの中身をHDDにコピーして,その中の"Settings"ディレクトリを以下に指定してください. | |
ROOT_DIR="/Users/river24/Desktop/EIC2013G/Settings" | |
# 処理中の一時ファイルを出力する先を指定してください. | |
TMP_DIR="/tmp" | |
# ---- ここまで設定箇所 ---- | |
# 処理中に生成される一時ファイルです. | |
PDF_LIST="${TMP_DIR}/pdf.list" | |
PAGE_LIST="${TMP_DIR}/page.list" | |
LINK_LIST="${TMP_DIR}/link.list" | |
TMP_HTML="${TMP_DIR}/tmp.html" | |
SESSION_LIST="${TMP_DIR}/session.list" | |
PAPER_LIST="${TMP_DIR}/paper.list" | |
cd "${ROOT_DIR}" | |
# このスクリプトによる変更を施す前の './html' ディレクトリを, | |
# './html.original' としてバックアップします. | |
# このスクリプトによる変更を施す前に戻したい場合は,これから復元して下さい. | |
if [ -e "./html.original" ] | |
then | |
sleep 0 | |
else | |
if [ -e "./html" ] | |
then | |
cp -r "./html" "./html.original" | |
else | |
echo "'./html' directory is not found." | |
exit | |
fi | |
fi | |
if [ -e "./html.original" ] | |
then | |
if [ -e "./html" ] | |
then | |
rm -rf "./html" | |
fi | |
cp -r "./html.original" "./html" | |
else | |
echo "failed to backup './html' directory as './html.original'." | |
exit | |
fi | |
if [ -e "./html" ] | |
then | |
sleep 0 | |
else | |
echo "'./html' directory is not found." | |
exit | |
fi | |
if [ -e ${PDF_LIST} ] | |
then | |
rm "${PDF_LIST}" | |
fi | |
find "./pdf" -type f | egrep "[^\/]+\.pdf" | perl -pe 's/^.*\/([^\/]+\.pdf).*$/$1/g' | sort | uniq > ${PDF_LIST} | |
find "./html" -type f | egrep "[^\/]+\.html" | xargs chmod 644 | |
find "./html" -type f | egrep "[^\/]+\.html" | | |
while read HTML | |
do | |
cat ${HTML} | perl -pe 's/<\/td>/<\/td>\n/g' | perl -pe 's/<tr>/<tr>\n/g' | perl -pe 's/<\/tr>/<\/tr>\n/g' > ${TMP_HTML} | |
cp ${TMP_HTML} ${HTML} | |
done | |
for PDF in `cat ${PDF_LIST}` | |
do | |
echo ${PDF} | |
if [ -e ${PAGE_LIST} ] | |
then | |
rm "${PAGE_LIST}" | |
fi | |
find "./html" -type f | egrep "[^\/]+\.html" | xargs grep "${PDF}" | perl -pe 's/^([^:]+):.*(<a [^>]*href=\"[^\"]*${PDF}\"[^>]*>.+<\/a>).*$/$1:$2/g' > ${PAGE_LIST} | |
if [ -e ${PAGE_LIST} ] | |
then | |
if [ -e ${LINK_LIST} ] | |
then | |
rm "${LINK_LIST}" | |
fi | |
touch ${LINK_LIST} | |
echo '<form><select style="width: 60px; font-size: 8px;" onChange="location.href=value;"><option value="#">▼Link</option>' >> ${LINK_LIST} | |
cat ${PAGE_LIST} | | |
while read LINE | |
do | |
FLAG=`echo "${LINE}" | md5` | |
PAGE=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$1/g'` | |
TEXT=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$2/g' | perl -pe 's/^.*<a [^>]*>(.+)<\/a>.*$/$1/g' | perl -pe 's/<.*?>//g'` | |
echo "<option value=\"../.${PAGE}#${FLAG}\">${TEXT}</option>" >> ${LINK_LIST} | |
done | |
echo '</select></form>' >> ${LINK_LIST} | |
cat ${PAGE_LIST} | | |
while read LINE | |
do | |
FLAG=`echo "${LINE}" | md5` | |
PAGE=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$1/g'` | |
TARGET=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$2/g'` | |
LINKS=`cat ${LINK_LIST}` | |
cat ${PAGE} | perl -pe "s|\Q${TARGET}\E|${TARGET}<a name='${FLAG}' id='${FLAG}'> </a><br />${LINKS}|g" > ${TMP_HTML} | |
cp ${TMP_HTML} ${PAGE} | |
done | |
fi | |
done | |
cat "./html/program/schedule.html" | perl -pe 's|<br />\n|<br /><KAIGYOU>|g' | perl -pe 's|<hr />|\n <hr />\n |g' > ${TMP_HTML} | |
cp ${TMP_HTML} "./html/program/schedule.html" | |
egrep "[A-Z]+\-[0-9]+\. " "./html/program/schedule.html" > ${SESSION_LIST} | |
cat ${SESSION_LIST} | | |
while read SESSION | |
do | |
if [ -e $PAPER_LIST ] | |
then | |
rm $PAPER_LIST | |
fi | |
touch $PAPER_LIST | |
SESSION_ID=`echo " ${SESSION}" | perl -pe 's|^.*[^A-Z]+([A-Z]+\-[0-9]+)\..*$|$1|g'` | |
FLAG=`echo ${SESSION} | md5` | |
SESSION_TYPE1=`echo ${SESSION} | egrep "<[0-9]+〜[0-9]+>" | wc -l` | |
if [ ${SESSION_TYPE1} -eq 1 ] | |
then | |
SESSION_START=`echo " ${SESSION}" | perl -pe 's|^.*[^A-Z]+([A-Z]+\-[0-9]+)\..*<([0-9]+)〜([0-9]+)>.*$|$2|g'` | |
SESSION_END=`echo " ${SESSION}" | perl -pe 's|^.*[^A-Z]+([A-Z]+\-[0-9]+)\..*<([0-9]+)〜([0-9]+)>.*$|$3|g'` | |
i=${SESSION_START} | |
while [ ${i} -le ${SESSION_END} ] | |
do | |
echo "${SESSION_ID}-${i}" >> ${PAPER_LIST} | |
i=`expr ${i} + 1` | |
done | |
else | |
SESSION_TYPE2=`echo ${SESSION} | egrep "<[0-9]+>" | wc -l` | |
if [ ${SESSION_TYPE2} -eq 1 ] | |
then | |
SESSION_START=`echo ${SESSION} | perl -pe 's|^(.)| $1|g' | perl -pe 's|^.*[^A-Z]+([A-Z]+\-[0-9]+)\..*<([0-9]+)>.*$|$2|g'` | |
echo "${SESSION_ID}-${SESSION_START}" >> ${PAPER_LIST} | |
else | |
echo ${SESSION_ID} >> ${PAPER_LIST} | |
fi | |
fi | |
cat ${PAPER_LIST} | | |
while read PAPER_ID | |
do | |
find "./html" -type f | egrep "[^\/]+\.html" | xargs egrep "<option .*>${PAPER_ID}</option>" | | |
while read LINE | |
do | |
PAGE=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$1/g'` | |
TARGET=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$2/g'` | |
cat ${PAGE} | perl -pe "s|${TARGET}$|${TARGET}<option value='../../html/program/schedule.html#${FLAG}'>■スケジュール一覧で確認■</option>|g" > ${TMP_HTML} | |
cp ${TMP_HTML} ${PAGE} | |
done | |
done | |
SESSION_TOP=`cat ${PAPER_LIST} | head -n 1` | |
find "./html" -type f | egrep "[^\/]+\.html" | xargs egrep "<option .*>${SESSION_TOP}</option>" | head -n 1 | | |
while read LINE | |
do | |
TARGET=`echo "${LINE}" | perl -pe 's/^([^:]+):(.*)$/$2/g'` | |
LINK=`echo ${TARGET} | perl -pe 's|^.* value="([^"]+)".*$|$1|g'` | |
NEW_SESSION=`echo "${SESSION}" | perl -pe "s|${SESSION_ID}|<a href='${LINK}' name='${FLAG}' id='${FLAG}'>${SESSION_ID}</a>|g"` | |
cat "./html/program/schedule.html" | perl -pe "s|${SESSION}|${NEW_SESSION}|g" > ${TMP_HTML} | |
cp ${TMP_HTML} "./html/program/schedule.html" | |
done | |
done | |
cat "./html/program/schedule.html" | perl -pe 's|<KAIGYOU>|\n|g' | perl -pe 's|^<td| <td|g' | perl -pe 's|^</tr>| </tr>|g' | perl -pe 's|^( <td class="bumon"> </td>)$| $1|g' > ${TMP_HTML} | |
cp ${TMP_HTML} "./html/program/schedule.html" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment