Skip to content

Instantly share code, notes, and snippets.

@ayu-mushi
Last active April 14, 2022 00:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ayu-mushi/994b98ef5eb4f83161153196e18c9ec3 to your computer and use it in GitHub Desktop.
Save ayu-mushi/994b98ef5eb4f83161153196e18c9ec3 to your computer and use it in GitHub Desktop.
Detailed history pages for w3m cgi
#!/usr/bin/python
from tika import parser
import argparse
import os
import sys
def print_title(parsed, filename):
try:
print(parsed['metadata']['title'])
except KeyError:
sys.stderr.write("that document don't has title.")
def print_abstract(parsed, filename):
try:
print(parsed['content'].strip(" \n")[0:100])
except TypeError:
sys.stderr.write("the content of that document is corrupt")
except KeyError:
sys.stderr.write("the content of that document is corrupt")
except AttributeError:
sys.stderr.write("the content of that document is corrupt")
print ""
def alls(parsed, filename):
print("path: " + filename)
print("title:")
print_title(parsed, filename)
print("abstract:")
print_abstract(parsed, filename)
def execute_for_each(f, args):
for n in args.filenames:
parsed = parser.from_file(n)
f(parsed, n)
argparser = argparse.ArgumentParser(description='')
subparsers = argparser.add_subparsers(help='sub-command help')
pt = subparsers.add_parser("title", help="print title")
pt.add_argument("filenames", type=str, nargs='*')
pt.set_defaults(func=(lambda(args): execute_for_each(print_title, args)))
pa = subparsers.add_parser("abst", help="print abstract")
pa.add_argument("filenames", type=str, nargs='*')
pa.set_defaults(func=(lambda(args): execute_for_each(print_abstract, args)))
c_print_all = subparsers.add_parser("all", help="print all")
c_print_all.add_argument("filenames", type=str, nargs='*')
c_print_all.set_defaults(func=(lambda(args): execute_for_each(alls, args)))
args = argparser.parse_args()
args.func(args)
#!/usr/bin/env zsh
# w3m cgiへ詳細履歴を生成
# generate history in detail in w3m
# for local cgi
function echo1() {
echo "$1"
echo ""
}
echo "Content-type: text/html"
echo1 "<html>"
Q=$(python -c "import urlparse \\
q=urlparse.parse_qs('$QUERY_STRING') \\
print(q['n'][0])
print(q['ext'][0])
print(q['size'][0])
")
N=$(echo "$Q"|hwk "(!! 0)")
EXT=$(echo "$Q"|hwk "(!! 1)")
VIEW_SIZE=$(echo "$Q"|hwk "(!! 2)")
echo1 "<title>$EXT history</title>"
echo1 "<h1>$EXT history</h1>"
echo1 "<ul>"
for LOCATION in $(ls -t ~/.w3m/*."$EXT" | sed -n "$(($N+1)),$(($N+$VIEW_SIZE))p")
do
echo1 "<li>"
case $EXT in
html)
echo1 "<p><a href='$LOCATION'>Title: $(grep --before 6 --after 6 "<title>" $LOCATION | sed -n -e "/<title>/,/<\/title>/p" | sed -e "s/<[^>]*>//g")</a></p>"
;;
*) echo1 "<p><a href='$LOCATION'>Title: $(doctitle title $LOCATION)</a></p>";;
esac
echo1 "<p>Location: '$LOCATION'</p>"
case $EXT in
pdf)
echo1 "<p>Abstract: $(pdftotext -l 3 -enc UTF-8 $LOCATION - | head -10)</p>";;
*)
echo1 "<p>Abstract: $(perl -pe 's/(\n| )//g' $LOCATION | perl -pe 's/<script[\s\S]*<\/script>//g'| perl -pe 's/<style[\s\S]<\/style>//g' | perl -pe 's/<.*?>//g' | awk '{print substr($0, 1, 100)}')</p>";;
esac
echo1 "</li>"
done
echo1 "</ul>"
echo1 "<div><a href='?n=$(($N+$VIEW_SIZE))&ext=$EXT&size=$VIEW_SIZE'>next</a></div>"
echo1 "<div><a href='?n=0&size=7&ext=pdf'>pdf history</a></div>"
echo1 "<div><a href='?n=0&size=20&ext=html'>html history</a></div>"
echo1 "</html>"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment