ajorg/find_rm_corrupt.py

## readme.md

      
    Raw
  

              readme.md
            
          
    I wrote this script to generate a list of files and pages to send to reMarkable Support for recovery. It's a bit rough, but it does the job.
It works by attempting to convert each page to SVG, and then looking for any empty .svg files. Because there are two different page formats, versions 5 and 6, two different tools are used.
Instructions

Backup

Connect your reMarkable to your computer using a USB-C cable and confirm you can login over SSH. Then copy all of your documents to your computer.
I did something like this:
rsync -aP root@10.11.99.1:.local/share/remarkable/xochitl/ xochitl-$(date +%Y%m%d)/
This can take a little while. I had about half a GB of data. You can disconnect your tablet - we don't want to endanger it.
Setup

You'll need two tools for the conversions. For version 5 pages, I use rm2svg.py, and for version 6 pages I use rmc.
rmc

I like to use venv for this.
python -m venv ~/rmc
source ~/rmc/bin/activate
pip install rmc
rm2svg.py

Download this from https://raw.githubusercontent.com/chemag/maxio/master/rm_tools/rm2svg.py then put it in your PATH and make it executable. Or change the command (around line 21) to call it correctly.
Run

You'll need to be in the directory where you copied all the files to. ls */*.rm should return a bunch of page files with long UUIDs for names.
Then run python find_rm_corrupt.py. This will take a long time, because it's going to try to convert every page you have to SVG format. If you have corrupted pages, there will also be a bunch of scary errors from one or the other of the scripts, but it should continue to churn through the pages and at the end it should give you a list of documents and their corrupted pages, as well as a summary of how many documents and how many pages it failed to convert.
If something is set up wrong the script might error out, or it might report some pages as corrupt that aren't.
I didn't want to put a lot more time into it, but I'm happy to collaborate if anyone is interested.

  
## find_rm_corrupt.py
# Author: Andrew Jorgensen
# SPDX-License-Identifier: MIT-0
import os
from glob import glob
from json import dumps, load
from os.path import exists, getmtime, getsize, join, split
from subprocess import run

HEADER_5 = b"reMarkable .lines file, version=5"
HEADER_6 = b"reMarkable .lines file, version=6"

corrupt = set()
for file in glob("*/*.rm"):
    svg = file + ".svg"
    if exists(svg) and getsize(svg) != 0:
        continue
    if not exists(svg) or getmtime(svg) < getmtime(file):
        print(file)
        with open(file, "rb") as rm:
            header = rm.read(33)
        if header == HEADER_6:
            run(("rmc", file, "-o", svg))
        elif header == HEADER_5:
            run(("rm2svg.py", "-i", file, "-o", svg))
    if not exists(svg) or getsize(svg) == 0:
        corrupt.add(file)


def parents(uuid):
    with open(uuid + ".metadata", "r") as metadata_f:
        metadata = load(metadata_f)
    name = metadata.get("visibleName")
    parent = metadata.get("parent", "")
    if parent == "":
        return name
    elif parent == "trash":
        return parent + "/" + name
    else:
        return parents(parent) + "/" + name


def find_page(file):
    (doc_uuid, page_fn) = split(file)[:2]
    # print(doc_uuid)
    page_uuid = page_fn[:-3]
    # print(page_uuid)
    with open(split(file)[0] + ".content", "r") as content_f:
        content = load(content_f)
    if "cPages" in content:
        for number, p in enumerate(content["cPages"]["pages"]):
            if p["id"] == page_uuid:
                return number + 1
    elif "pages" in content:
        for number, p in enumerate(content["pages"]):
            if p == page_uuid:
                return number + 1


docs = {}
for file in sorted(corrupt):
    with open(file, "rb") as rm:
        header = rm.read(33)
    h = header.decode("ascii")
    doc = parents(split(file)[0])
    page = find_page(file)
    if doc not in docs:
        docs[doc] = {"path": doc, "pages": {page: {"header": h, "file": file}}}
    else:
        docs[doc]["pages"][page] = {"header": h, "file": file}

num_docs = len(docs.keys())
num_pages = 0
for d in docs:
    num_pages += len(docs[d]["pages"].keys())
    pages = " ".join([str(p) for p in sorted(docs[d]["pages"])])
    print(f"\n{d}\nPages: {pages}")

print(f"Documents: {num_docs}")
print(f"Pages: {num_pages}")
	# Author: Andrew Jorgensen
	# SPDX-License-Identifier: MIT-0
	import os
	from glob import glob
	from json import dumps, load
	from os.path import exists, getmtime, getsize, join, split
	from subprocess import run

	HEADER_5 = b"reMarkable .lines file, version=5"
	HEADER_6 = b"reMarkable .lines file, version=6"

	corrupt = set()
	for file in glob("/.rm"):
	svg = file + ".svg"
	if exists(svg) and getsize(svg) != 0:
	continue
	if not exists(svg) or getmtime(svg) < getmtime(file):
	print(file)
	with open(file, "rb") as rm:
	header = rm.read(33)
	if header == HEADER_6:
	run(("rmc", file, "-o", svg))
	elif header == HEADER_5:
	run(("rm2svg.py", "-i", file, "-o", svg))
	if not exists(svg) or getsize(svg) == 0:
	corrupt.add(file)


	def parents(uuid):
	with open(uuid + ".metadata", "r") as metadata_f:
	metadata = load(metadata_f)
	name = metadata.get("visibleName")
	parent = metadata.get("parent", "")
	if parent == "":
	return name
	elif parent == "trash":
	return parent + "/" + name
	else:
	return parents(parent) + "/" + name


	def find_page(file):
	(doc_uuid, page_fn) = split(file)[:2]
	# print(doc_uuid)
	page_uuid = page_fn[:-3]
	# print(page_uuid)
	with open(split(file)[0] + ".content", "r") as content_f:
	content = load(content_f)
	if "cPages" in content:
	for number, p in enumerate(content["cPages"]["pages"]):
	if p["id"] == page_uuid:
	return number + 1
	elif "pages" in content:
	for number, p in enumerate(content["pages"]):
	if p == page_uuid:
	return number + 1


	docs = {}
	for file in sorted(corrupt):
	with open(file, "rb") as rm:
	header = rm.read(33)
	h = header.decode("ascii")
	doc = parents(split(file)[0])
	page = find_page(file)
	if doc not in docs:
	docs[doc] = {"path": doc, "pages": {page: {"header": h, "file": file}}}
	else:
	docs[doc]["pages"][page] = {"header": h, "file": file}

	num_docs = len(docs.keys())
	num_pages = 0
	for d in docs:
	num_pages += len(docs[d]["pages"].keys())
	pages = " ".join([str(p) for p in sorted(docs[d]["pages"])])
	print(f"\n{d}\nPages: {pages}")

	print(f"Documents: {num_docs}")
	print(f"Pages: {num_pages}")