Created
October 12, 2022 09:30
-
-
Save Lightblues/76467909fb70d14a8f5135e91774c261 to your computer and use it in GitHub Desktop.
管理Markdown的图片文件: 在md文件被移动之中, 从索引目录中将引用的图片移动到相应目录下.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" @220404 | |
移动文档中引用的图片到对应的文件夹下. 保证文中所有引用的图片都是在本地的 media 目录下. | |
使用: `python move_media.py <md_path1> ...` 其中path可以是相对路径 | |
使用场景: 1) 在一份文档中编写好内容后, 将其中部分转移到新的文档中; 2) 文档修改了名字. | |
TODO: | |
- 下载网络图片 | |
- 清理功能. | |
""" | |
import re | |
import os | |
import sys | |
import subprocess | |
import collections | |
# 匹配 md 的图片引用, 包括 两个 group | |
pattern_media = re.compile(r"\!\[(.*?)\]\((.*?)\)") | |
mname2path = {} # media name 2 path | |
def get_media_path_map(root: str, is_media_dir: bool=False): | |
""" 获取所有的图片路径. | |
递归地将 root 目录下所有media目录下的图片路径放入 mname2path 中. | |
""" | |
if is_media_dir: | |
for name in os.listdir(root): | |
if name.startswith("."): | |
continue | |
if os.path.isfile(f"{root}/{name}"): | |
mname2path[name] = f"{root}/{name}" | |
continue | |
for picname in os.listdir(f"{root}/{name}"): | |
if picname.startswith("."): | |
continue | |
# 约束图片名都不一样 —— 考虑到重复图片, 好像不太行. | |
# assert picname not in mname2path | |
mname2path[picname] = f"{root}/{name}/{picname}" | |
for name in os.listdir(root): | |
p = f"{root}/{name}" | |
if name == "media": | |
get_media_path_map(p, True) | |
elif os.path.isdir(p) and not p.startswith("."): | |
get_media_path_map(p) | |
else: | |
continue | |
def move_media(path: str): | |
""" 移动文件夹中的图片到对应的文件夹下. | |
path: 要移动的md文件完整路径 | |
""" | |
dir, fname = os.path.split(path) | |
assert fname.endswith(".md") | |
name = fname[:-3] | |
destdir = f"{dir}/media/{name}" | |
if not os.path.exists(destdir): | |
os.makedirs(destdir) | |
with open(f"{dir}/{fname}", "r", encoding="utf-8") as f: | |
md = f.read() | |
def replace_func(r): | |
""" 定义 re.sub 中的repl函数 | |
输入: 一个 re.Match 对象 | |
""" | |
desc, path = r.group(1), r.group(2) | |
fname = os.path.split(path)[-1] | |
newpath = f"media/{name}/{fname}" | |
# 若图片不存在, 复制过来 | |
if not os.path.exists(f"{dir}/{newpath}"): | |
oldpath = mname2path[os.path.split(path)[-1]] | |
print(f"moved: {fname}") | |
# 直接移动! 确保每个图片仅在当前文件引用. | |
subprocess.run(["mv", oldpath, newpath], cwd=dir) | |
return f"![{desc}]({newpath})" | |
# 匹配所有的图片. | |
md_new = re.sub(pattern_media, replace_func, md) | |
with open(f"{dir}/{fname}", "w") as f: | |
f.write(md_new) | |
def test_media_suffix(): | |
""" 基本都是 jpg, png 格式的 """ | |
print("#pic:", len(mname2path)) | |
suffixs = [os.path.splitext(name)[1] for name in mname2path] | |
collections.Counter(suffixs) | |
# Counter({'.jpg': 267, '.png': 210}) | |
def test_re(): | |
s = """ | |
![](./media/1.jpg) | |
balabala | |
abc ![](./media/2.jpg) balabala | |
""" | |
# res = re.search(pattern_media, s) | |
# print(res) | |
# 只能匹配到一个 | |
def replace_func(r): | |
# print(r.group(1)) | |
# mname2path[r.group(2)] | |
oldpath = r.group(2) | |
newpath = oldpath + ".new" | |
return f"![{r.group(1)}]({newpath})" | |
res = re.sub(pattern_media, replace_func, s) | |
print(res) | |
if __name__ == "__main__": | |
# 索引根目录 | |
# root = os.path.split(os.path.dirname(os.path.abspath(__file__)))[0] | |
root = os.path.dirname(os.path.abspath(__file__)) | |
print(f"root: {root}") | |
# | |
get_media_path_map(root) | |
if len(sys.argv) < 2: | |
print("Usage: python move_media.py <path>") | |
exit(1) | |
# path = sys.argv[1] | |
# test_media_suffix() | |
for path in sys.argv[1:]: | |
move_media(path) | |
# get_media_path_map("Documents") | |
# test_re() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment