ayu-mushi/w3m_commentary.py

## w3m_commentary.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# printenvを使ってw3mでページ毎にコメントを記録できるようにする
# 今まで作ったURLと注釈ファイル名(タイトル、ダブったら数字を付ける)の組の列をjsonで保持 連想配列
# URLを渡すと注釈ファイルが開く
# keymap A EXEC_SHELL "w3m_commentary.py"

import lxml.html
import json
import os
import re
import subprocess

def readContent(path, encoding="utf-8"):
  f = open(path, encoding=encoding, errors="ignore")
  content = f.read()
  f.close()
  return content

def envvar (env, varname): #get environment variable
    return re.findall(varname + "=(.*)", env)[0]

def read_srcfile(src_filename, encoding="utf-8"):
    name, ext = os.path.splitext(src_filename)
    if ext == ".gz" or ext == "gz":
        import gzip
        f = gzip.open(src_filename, "rt", encoding=encoding, errors="ignore")
        content = f.read()
        f.close()
        return content
    else:
        return readContent(src_filename, encoding)

def gettitle(srcfile):
    dom = lxml.html.fromstring(srcfile.encode("utf-8"))
    return dom.xpath("//title")[0].text

def getlength(w3m_url):
    text = subprocess.check_output("w3m -dump "+w3m_url, shell=True).decode()
    return len(text)

#ファイル名として問題がある記号を取り除く
def sub_for_more_filenameness(pagetitle):
    t = re.sub(r'[\ \n/]', "_", pagetitle) + ".md"
    return t

#urlが違うのにタイトルが同じというような重複があればプレフィックス(数)を付ける
#ツイートにめもりたいとき必要
def maketitle(pagetitle, url, comms):
    reversed_comms = {v:k for k, v in comms.items()}
    if pagetitle in reversed_comms:
        if reversed_comms.get(pagetitle) != url:
            prefix = re.match("[0-9]+", pagetitle)
            if prefix is not None:
                return maketitle(str(int(prefix.group())+1)+re.sub("[0-9]+", "", pagetitle), url, comms)
            else:
                return maketitle("0"+pagetitle,url,comms)
    return pagetitle

def load_commantaries():
    dirpath = os.path.expanduser("~/w3m_commentary/")
    path = dirpath + "commentaries.json"
    if not os.path.exists(dirpath):
        os.mkdir(dirpath)

    if os.path.exists(path):
        return json.loads(readContent(path))
    else:
        f = open(path, "w")
        f.write("{}")
        f.close()
        return {}

env = subprocess.check_output("printenv", shell=True).decode(errors="ignore")

w3m_url = envvar(env, "W3M_URL")
comms = load_commantaries()

if not w3m_url in comms :
    w3m_sourcefile = envvar(env, "W3M_SOURCEFILE")
    w3m_charset = envvar(env, "W3M_CHARSET")
    srcfile = read_srcfile(w3m_sourcefile, encoding=w3m_charset)
    commname = maketitle(sub_for_more_filenameness(gettitle(srcfile)), w3m_url, comms)
    print(commname)

    particular_comm = open(os.path.expanduser("~/w3m_commentary/"+commname), "w")
    particular_comm.write(gettitle(srcfile) + "\n====\n" + w3m_url + "\nlength: " + str(getlength(w3m_url)))
    particular_comm.close()

    comms[w3m_url] = commname
    f = open(os.path.expanduser("~/w3m_commentary/commentaries.json"), "w")
    json.dump(comms, f)
    f.close()

os.system("vim " + '"' + os.path.expanduser("~/w3m_commentary/" + comms[w3m_url]) + '"')
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	# printenvを使ってw3mでページ毎にコメントを記録できるようにする
	# 今まで作ったURLと注釈ファイル名(タイトル、ダブったら数字を付ける)の組の列をjsonで保持連想配列
	# URLを渡すと注釈ファイルが開く
	# keymap A EXEC_SHELL "w3m_commentary.py"

	import lxml.html
	import json
	import os
	import re
	import subprocess

	def readContent(path, encoding="utf-8"):
	f = open(path, encoding=encoding, errors="ignore")
	content = f.read()
	f.close()
	return content

	def envvar (env, varname): #get environment variable
	return re.findall(varname + "=(.*)", env)[0]

	def read_srcfile(src_filename, encoding="utf-8"):
	name, ext = os.path.splitext(src_filename)
	if ext == ".gz" or ext == "gz":
	import gzip
	f = gzip.open(src_filename, "rt", encoding=encoding, errors="ignore")
	content = f.read()
	f.close()
	return content
	else:
	return readContent(src_filename, encoding)

	def gettitle(srcfile):
	dom = lxml.html.fromstring(srcfile.encode("utf-8"))
	return dom.xpath("//title")[0].text

	def getlength(w3m_url):
	text = subprocess.check_output("w3m -dump "+w3m_url, shell=True).decode()
	return len(text)

	#ファイル名として問題がある記号を取り除く
	def sub_for_more_filenameness(pagetitle):
	t = re.sub(r'[\ \n/]', "_", pagetitle) + ".md"
	return t

	#urlが違うのにタイトルが同じというような重複があればプレフィックス(数)を付ける
	#ツイートにめもりたいとき必要
	def maketitle(pagetitle, url, comms):
	reversed_comms = {v:k for k, v in comms.items()}
	if pagetitle in reversed_comms:
	if reversed_comms.get(pagetitle) != url:
	prefix = re.match("[0-9]+", pagetitle)
	if prefix is not None:
	return maketitle(str(int(prefix.group())+1)+re.sub("[0-9]+", "", pagetitle), url, comms)
	else:
	return maketitle("0"+pagetitle,url,comms)
	return pagetitle

	def load_commantaries():
	dirpath = os.path.expanduser("~/w3m_commentary/")
	path = dirpath + "commentaries.json"
	if not os.path.exists(dirpath):
	os.mkdir(dirpath)

	if os.path.exists(path):
	return json.loads(readContent(path))
	else:
	f = open(path, "w")
	f.write("{}")
	f.close()
	return {}

	env = subprocess.check_output("printenv", shell=True).decode(errors="ignore")

	w3m_url = envvar(env, "W3M_URL")
	comms = load_commantaries()

	if not w3m_url in comms :
	w3m_sourcefile = envvar(env, "W3M_SOURCEFILE")
	w3m_charset = envvar(env, "W3M_CHARSET")
	srcfile = read_srcfile(w3m_sourcefile, encoding=w3m_charset)
	commname = maketitle(sub_for_more_filenameness(gettitle(srcfile)), w3m_url, comms)
	print(commname)

	particular_comm = open(os.path.expanduser("~/w3m_commentary/"+commname), "w")
	particular_comm.write(gettitle(srcfile) + "\n====\n" + w3m_url + "\nlength: " + str(getlength(w3m_url)))
	particular_comm.close()

	comms[w3m_url] = commname
	f = open(os.path.expanduser("~/w3m_commentary/commentaries.json"), "w")
	json.dump(comms, f)
	f.close()

	os.system("vim " + '"' + os.path.expanduser("~/w3m_commentary/" + comms[w3m_url]) + '"')