Skip to content

Instantly share code, notes, and snippets.

@Techcable
Created June 25, 2023 05:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Techcable/0021d9ff97a787d2083fe87e3cd11fe6 to your computer and use it in GitHub Desktop.
Save Techcable/0021d9ff97a787d2083fe87e3cd11fe6 to your computer and use it in GitHub Desktop.
Convert Firefox bookmarks json -> raindrop.io CSV
"""Converts firefox's JSON bookmarks format to the CSV format raindrop.io expects
See here for docs on raindrop: https://help.raindrop.io/import/#csv
"""
from __future__ import annotations
from typing import (
ClassVar, Iterable, NewType, Any, Iterator
)
from enum import Enum, nonmember as enum_nonmember
import dataclasses as dc
import json
import operator
import functools
import sys
import csv
from io import TextIOBase
from datetime import datetime
import click
from support.simple_serde import NameStyle, ParseError, parse_type
class EntryType(Enum):
PLACE = "text/x-moz-place"
PLACE_CONTAINER = "text/x-moz-place-container"
PLACE_SEPERATOR = "text/x-moz-place-separator"
def parse_moz_datetime(tp: type, value: Any) -> datetime:
assert issubclass(tp, datetime), tp
if not isinstance(value, int):
raise ParseError(f"Value should be an integer timestamp: {value!r}")
primary, micros = divmod(value, 1000_000)
return datetime.fromtimestamp(primary).replace(microsecond=micros)
@dc.dataclass(kw_only=True)
class NormalizedEntry:
guid: str
title: str
tags: list[str] = dc.field(
metadata={"parser": lambda _tp, val: val.split(',')},
default_factory=list
)
date_added: datetime = dc.field(metadata={"parser": parse_moz_datetime})
last_modified: datetime = dc.field(metadata={"parser": parse_moz_datetime})
entry_type: EntryType = dc.field(metadata={"name": "type"})
root_id: str | None = dc.field(metadata={"name": "root"}, default=None)
children: list[NormalizedEntry] | None = dc.field(default=None, repr=False)
uri: str | None = None
keyword: str | None = None
_ignored_fields: ClassVar[set[str]] = {"index", "id", "typeCode", "iconUri", "postData", "charset"}
# convert from camel case -> snake case
_name_styling: ClassVar = (NameStyle.CAMEL_CASE, NameStyle.SNAKE_CASE)
@staticmethod
def parse(data: Any) -> NormalizedEntry:
return parse_type(NormalizedEntry, data)
def print(
self, target=sys.stdout, *,
level: int, child_limit: int | None = None
):
indent = ' ' * (2 * level)
num_children = len(self.children) if self.children is not None else 0
data = self.uri or str(num_children)
print(f"{indent}* {self.title} -> {data[:30]}", file=target)
if self.children is not None:
for child in self.children[:child_limit]:
child.print(target, level=level + 1, child_limit=child_limit)
def __hash__(self):
return hash(self.guid)
def __eq__(self, other):
if isinstance(other, NormalizedEntry):
return self.guid == other.guid
else:
return NotImplemented
class RootChildType(Enum):
MENU = ("menu", "bookmarksMenuFolder")
TOOLBAR = ("toolbar", "toolbarFolder")
UNFILED = ("unfiled", "unfiledBookmarksFolder")
MOBILE = ("mobile", "mobileFolder")
title: str
root_id: str
def __new__(cls, title, root_id):
obj = object.__new__(cls)
obj._value_ = title
obj.title = title
obj.root_id = root_id
return obj
@property
def guid_prefix(self) -> str:
return self.title + "___"
@property
def human_name(self):
match self:
case RootChildType.MENU:
return "Bookmarks Menu"
case RootChildType.TOOLBAR:
return "Bookmarks Toolbar"
case RootChildType.UNFILED:
return "Other Bookmarks"
case RootChildType.MOBILE:
return "Mobile Bookmarks"
case _:
raise AssertionError
@dc.dataclass(frozen=True)
class EntryCsvMeta:
entry: NormalizedEntry
_: dc.KW_ONLY
human_name: str
parent: EntryCsvMeta | None
def __post_init__(self):
if ('/' in self.human_name and
self.entry.entry_type != EntryType.PLACE):
# HACK
object.__setattr__(
self, 'human_name',
self.human_name.replace('/', '<slash>')
)
def parents(self) -> Iterator[EntryCsvMeta]:
parent = self.parent
while parent is not None:
yield parent
parent = parent.parent
@functools.cached_property
def full_human_name(self) -> str:
parts = [self.human_name]
for parent in self.parents():
assert '/' not in parent.human_name, parent
parts.append(parent.human_name)
parts.reverse()
return '/'.join(parts)
def write_csv(root_node: NormalizedEntry, output_file: TextIOBase):
assert root_node.guid.startswith("root___"), root_node.guid
assert root_node.root_id == "placesRoot", root_node.root_id
assert root_node.title == "", root_node.title
assert root_node.children is not None
assert root_node.entry_type == EntryType.PLACE_CONTAINER
root_children: dict[RootChildType, NormalizedEntry] = {}
resolved_meta: dict[NormalizedEntry, EntryCsvMeta] = {}
for child in root_node.children:
child_type: RootChildType = RootChildType(child.title)
assert child_type not in root_children, (
f"Duplicate types: {child.title!r}"
)
assert child.guid.startswith(child_type.guid_prefix), child.guid
assert child.root_id == child.root_id, child.root_id
assert child.entry_type == EntryType.PLACE_CONTAINER
assert child.children is not None
root_children[child_type] = child
resolved_meta[child] = EntryCsvMeta(
entry=child,
human_name=child_type.human_name,
parent=None
)
assert set(root_children.keys()) == set(RootChildType), set(root_children.keys())
assert len(root_children) == 4
stack: list[NormalizedEntry] = list(root_children.values())
while stack:
parent = stack.pop()
resolved_parent = resolved_meta[parent]
assert parent.children is not None
for child in parent.children:
assert child not in resolved_meta
resolved_meta[child] = EntryCsvMeta(
entry=child,
human_name=child.title,
parent=resolved_parent
)
if child.children:
stack.append(child)
resolved_meta_sorted = sorted(
filter(
lambda meta: meta.entry.entry_type == EntryType.PLACE,
resolved_meta.values()
),
key=operator.attrgetter('full_human_name')
)
writer = csv.DictWriter(
output_file,
('folder', 'title', 'url', 'description', 'tags', 'created'),
)
writer.writeheader()
for meta in resolved_meta_sorted:
assert meta.entry.entry_type == EntryType.PLACE
assert meta.parent is not None, meta
writer.writerow(dict(
url=meta.entry.uri,
folder=meta.parent.full_human_name,
title=meta.entry.title,
# description: <missing>
tags=','.join(meta.entry.tags),
created=meta.entry.date_added.isoformat()
))
@click.command('convert')
@click.argument('input_file', type=click.File())
@click.argument('output_file', type=click.File(mode='wt'))
@click.option('output_format', '--format', type=click.Choice(('text', 'csv')))
def convert(input_file, output_format, output_file):
raw_data = json.load(input_file)
result = NormalizedEntry.parse(raw_data)
match output_format:
case 'text':
result.print(level=0)
case 'csv':
write_csv(result, output_file)
case _:
raise AssertionError
if __name__ == "__main__":
convert()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment