chenghongyao/cubox-termux-obsidian.py

## cubox-termux-obsidian.py
#!/bin/python

import os
import json
import sys
import time
import re
import requests


stamp = time.strftime("%Y%m%d%H%M%S")
title = "文件处理"
options = {}


# 1. 文件保存路径
ob_dst = "/data/data/com.termux/files/home/storage/shared/Documents/obnote/K 快收/"
# 2. 是否下载图片到本地, True/False
download_img = True
# 3. 图片下载路径
ob_img_dst = ob_dst + "assets/"

# 4. 可选标签
tags = ['技术','社评','信息', '生活','知识','其他']
# 5. 文件来源（只支持cubox导出的markdown)
host_map = {
        "www.zhihu.com":"知乎",
        "mp.weixin.qq.com":"微信"
        }

# 6. 自定义yaml
def get_meta(fro, md):

    created = time.strftime("%Y-%m-%d")
    ts = show_checkbox_dialog("选择标签",tags) # 标签

    meta = {
            "created": created,
            "uid": stamp,
            }
    if fro:
        meta["from"] = fro

    if ts:
        meta["tags"] = ','.join(ts)

    return meta

# 注册文件处理函数
def register(name):
    def _d(fun):
        options[name] = fun
    return _d

# 显示radio选择框
def show_radio_dialog(title,options):
    cmd = f'termux-dialog radio -t "{title}" -v "{",".join(options)}"'
    s = os.popen(cmd).read()
    res = json.loads(s)
    if res['code'] != -1 or res.get('index',None) is None:
        return
    return res['text']

def show_checkbox_dialog(title,options):
    cmd = f'termux-dialog checkbox -t "{title}" -v "{",".join(options)}"'
    s = os.popen(cmd).read()
    res = json.loads(s)
    if res['code'] != -1 or res.get('text','[]') == '[]':
        return
    return [x['text'] for x in res['values']]


# markdown简单预处理
def md_pre_common(md):
    md = re.sub(r"\n{3,}","\n\n",md)
    md = re.sub(r"\n-\n\n","\n",md)
    return md

@register("ob快收")
def move_to_obsidian(src):
    print(src)

    # 打开文件
    with open(src,'r',encoding='utf8') as f:
        md = f.read()

    # 提取第一行的一级标题作为文件名
    first_line = md.split("\n")[0]
    if first_line.startswith("# "):
        filename = first_line[2:]+".md"
    else:
        filename = os.path.basename(src)

    # 删除非法路径字符
    filename = re.sub(r'[\||/|\|:|*|?]','',filename)

    # 从最后一行提取文件来源（cubox)
    fro = None
    last_line = md.split("\n")[-1]
    res = re.search(r"\[查看原网页: (.*)\]",last_line)
    if res:
        fro = res.group(1)
        fro = host_map.get(fro,fro)

    # 文件预处理
    md = md_pre_common(md)


    # 生成yaml数据
    meta = get_meta(fro,md)

    meta_str  = "---\n" + "\n".join([f"{k}: {v}" for k,v in meta.items()]) + "\n---\n"


    # markdown 插入yaml
    md = meta_str + md

    # 下载图片
    if download_img:
        links = re.findall(r"(!\[(.*)\]\((.*)\))",md)
        link_count = len(links)

        if link_count > 0:
            if not os.path.exists(ob_img_dst):
                os.makedirs(ob_img_dst)

            # TODO: 多线程下载
            for i,item in enumerate(links):
                print("%d/%d:"%(i+1,link_count),end="")
                raw, title,href = item
                res = requests.get(href)
                content = res.content
                content_type = res.headers['Content-Type']
                if content_type.startswith("image/"):
                    ext = content_type[6:]
                    name = "%s%03d.%s"%(stamp,i,ext)
                    print(name)
                    with open(ob_img_dst + name,'wb') as f:
                        f.write(content)
                    md = md.replace(raw,f'![[{name}]]')

    # 保存文件
    with open(ob_dst+filename,'w') as f:
        f.write(md)

    # 删除源文件
    os.remove(src)


def main():

    if len(sys.argv) < 2:
        print("need file path")
        return

    keys = list(options.keys())
    if len(keys) > 1:
        res = show_radio_dialog(title,keys)
        if res is None:
            return
        fun = options[res]
    else:
        fun = options[keys[0]]

    fun(sys.argv[1])

main()
	#!/bin/python

	import os
	import json
	import sys
	import time
	import re
	import requests


	stamp = time.strftime("%Y%m%d%H%M%S")
	title = "文件处理"
	options = {}


	# 1. 文件保存路径
	ob_dst = "/data/data/com.termux/files/home/storage/shared/Documents/obnote/K 快收/"
	# 2. 是否下载图片到本地, True/False
	download_img = True
	# 3. 图片下载路径
	ob_img_dst = ob_dst + "assets/"

	# 4. 可选标签
	tags = ['技术','社评','信息', '生活','知识','其他']
	# 5. 文件来源（只支持cubox导出的markdown)
	host_map = {
	"www.zhihu.com":"知乎",
	"mp.weixin.qq.com":"微信"
	}

	# 6. 自定义yaml
	def get_meta(fro, md):

	created = time.strftime("%Y-%m-%d")
	ts = show_checkbox_dialog("选择标签",tags) # 标签

	meta = {
	"created": created,
	"uid": stamp,
	}
	if fro:
	meta["from"] = fro

	if ts:
	meta["tags"] = ','.join(ts)

	return meta

	# 注册文件处理函数
	def register(name):
	def _d(fun):
	options[name] = fun
	return _d

	# 显示radio选择框
	def show_radio_dialog(title,options):
	cmd = f'termux-dialog radio -t "{title}" -v "{",".join(options)}"'
	s = os.popen(cmd).read()
	res = json.loads(s)
	if res['code'] != -1 or res.get('index',None) is None:
	return
	return res['text']

	def show_checkbox_dialog(title,options):
	cmd = f'termux-dialog checkbox -t "{title}" -v "{",".join(options)}"'
	s = os.popen(cmd).read()
	res = json.loads(s)
	if res['code'] != -1 or res.get('text','[]') == '[]':
	return
	return [x['text'] for x in res['values']]


	# markdown简单预处理
	def md_pre_common(md):
	md = re.sub(r"\n{3,}","\n\n",md)
	md = re.sub(r"\n-\n\n","\n",md)
	return md

	@register("ob快收")
	def move_to_obsidian(src):
	print(src)

	# 打开文件
	with open(src,'r',encoding='utf8') as f:
	md = f.read()

	# 提取第一行的一级标题作为文件名
	first_line = md.split("\n")[0]
	if first_line.startswith("# "):
	filename = first_line[2:]+".md"
	else:
	filename = os.path.basename(src)

	# 删除非法路径字符
	filename = re.sub(r'[\\|\|/\|\\|:\|*\|?]','',filename)

	# 从最后一行提取文件来源（cubox)
	fro = None
	last_line = md.split("\n")[-1]
	res = re.search(r"\[查看原网页: (.*)\]",last_line)
	if res:
	fro = res.group(1)
	fro = host_map.get(fro,fro)

	# 文件预处理
	md = md_pre_common(md)


	# 生成yaml数据
	meta = get_meta(fro,md)

	meta_str = "---\n" + "\n".join([f"{k}: {v}" for k,v in meta.items()]) + "\n---\n"


	# markdown 插入yaml
	md = meta_str + md

	# 下载图片
	if download_img:
	links = re.findall(r"(!\[(.)\]\((.)\))",md)
	link_count = len(links)

	if link_count > 0:
	if not os.path.exists(ob_img_dst):
	os.makedirs(ob_img_dst)

	# TODO: 多线程下载
	for i,item in enumerate(links):
	print("%d/%d:"%(i+1,link_count),end="")
	raw, title,href = item
	res = requests.get(href)
	content = res.content
	content_type = res.headers['Content-Type']
	if content_type.startswith("image/"):
	ext = content_type[6:]
	name = "%s%03d.%s"%(stamp,i,ext)
	print(name)
	with open(ob_img_dst + name,'wb') as f:
	f.write(content)
	md = md.replace(raw,f'![[{name}]]')

	# 保存文件
	with open(ob_dst+filename,'w') as f:
	f.write(md)

	# 删除源文件
	os.remove(src)



	def main():

	if len(sys.argv) < 2:
	print("need file path")
	return

	keys = list(options.keys())
	if len(keys) > 1:
	res = show_radio_dialog(title,keys)
	if res is None:
	return
	fun = options[res]
	else:
	fun = options[keys[0]]

	fun(sys.argv[1])

	main()