Skip to content

Instantly share code, notes, and snippets.

@ayu-mushi
Last active July 2, 2017 12:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ayu-mushi/9ed7b4c489cc5f20b6825673ed4baddd to your computer and use it in GitHub Desktop.
Save ayu-mushi/9ed7b4c489cc5f20b6825673ed4baddd to your computer and use it in GitHub Desktop.
Comment for each web page written for w3m browser
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# printenvを使ってw3mでページ毎にコメントを記録できるようにする
# 今まで作ったURLと注釈ファイル名(タイトル、ダブったら数字を付ける)の組の列をjsonで保持 連想配列
# URLを渡すと注釈ファイルが開く
# keymap A EXEC_SHELL "w3m_commentary.py"
import lxml.html
import json
import os
import re
import subprocess
def readContent(path, encoding="utf-8"):
f = open(path, encoding=encoding, errors="ignore")
content = f.read()
f.close()
return content
def envvar (env, varname): #get environment variable
return re.findall(varname + "=(.*)", env)[0]
def read_srcfile(src_filename, encoding="utf-8"):
name, ext = os.path.splitext(src_filename)
if ext == ".gz" or ext == "gz":
import gzip
f = gzip.open(src_filename, "rt", encoding=encoding, errors="ignore")
content = f.read()
f.close()
return content
else:
return readContent(src_filename, encoding)
def gettitle(srcfile):
dom = lxml.html.fromstring(srcfile.encode("utf-8"))
return dom.xpath("//title")[0].text
def getlength(w3m_url):
text = subprocess.check_output("w3m -dump "+w3m_url, shell=True).decode()
return len(text)
#ファイル名として問題がある記号を取り除く
def sub_for_more_filenameness(pagetitle):
t = re.sub(r'[\ \n/]', "_", pagetitle) + ".md"
return t
#urlが違うのにタイトルが同じというような重複があればプレフィックス(数)を付ける
#ツイートにめもりたいとき必要
def maketitle(pagetitle, url, comms):
reversed_comms = {v:k for k, v in comms.items()}
if pagetitle in reversed_comms:
if reversed_comms.get(pagetitle) != url:
prefix = re.match("[0-9]+", pagetitle)
if prefix is not None:
return maketitle(str(int(prefix.group())+1)+re.sub("[0-9]+", "", pagetitle), url, comms)
else:
return maketitle("0"+pagetitle,url,comms)
return pagetitle
def load_commantaries():
dirpath = os.path.expanduser("~/w3m_commentary/")
path = dirpath + "commentaries.json"
if not os.path.exists(dirpath):
os.mkdir(dirpath)
if os.path.exists(path):
return json.loads(readContent(path))
else:
f = open(path, "w")
f.write("{}")
f.close()
return {}
env = subprocess.check_output("printenv", shell=True).decode(errors="ignore")
w3m_url = envvar(env, "W3M_URL")
comms = load_commantaries()
if not w3m_url in comms :
w3m_sourcefile = envvar(env, "W3M_SOURCEFILE")
w3m_charset = envvar(env, "W3M_CHARSET")
srcfile = read_srcfile(w3m_sourcefile, encoding=w3m_charset)
commname = maketitle(sub_for_more_filenameness(gettitle(srcfile)), w3m_url, comms)
print(commname)
particular_comm = open(os.path.expanduser("~/w3m_commentary/"+commname), "w")
particular_comm.write(gettitle(srcfile) + "\n====\n" + w3m_url + "\nlength: " + str(getlength(w3m_url)))
particular_comm.close()
comms[w3m_url] = commname
f = open(os.path.expanduser("~/w3m_commentary/commentaries.json"), "w")
json.dump(comms, f)
f.close()
os.system("vim " + '"' + os.path.expanduser("~/w3m_commentary/" + comms[w3m_url]) + '"')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment