#!/usr/bin/env python3 | |
# vim: sw=4 ts=4 et tw=100 cc=+1 | |
# | |
#################################################################################################### | |
# DESCRIPTION # | |
#################################################################################################### | |
# | |
# Decompressor/compressor for files in Mozilla's "mozLz4" format. Firefox uses this file format to | |
# compress e. g. bookmark backups (*.jsonlz4). | |
# | |
# This file format is in fact just plain LZ4 data with a custom header (magic number [8 bytes] and | |
# uncompressed file size [4 bytes, little endian]). | |
# | |
#################################################################################################### | |
# DEPENDENCIES # | |
#################################################################################################### | |
# | |
# - Tested with Python 3.10 | |
# - LZ4 bindings for Python, version 4.x: https://pypi.python.org/pypi/lz4 | |
# | |
#################################################################################################### | |
# LICENSE # | |
#################################################################################################### | |
# | |
# Copyright (c) 2015-2022, Tilman Blumenbach | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without modification, are permitted | |
# provided that the following conditions are met: | |
# | |
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions | |
# and the following disclaimer. | |
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of | |
# conditions and the following disclaimer in the documentation and/or other materials provided | |
# with the distribution. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR | |
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER | |
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
import argparse | |
import sys | |
import lz4.block | |
class BinFileArg: | |
def __init__(self, mode): | |
self._mode = mode | |
def __call__(self, arg): | |
objs = { | |
"r": sys.stdin.buffer, | |
"w": sys.stdout.buffer, | |
} | |
if arg == "-": | |
return objs[self._mode] | |
try: | |
return open(arg, self._mode + "b") | |
except OSError as e: | |
raise argparse.ArgumentTypeError( | |
"failed to open file for %s: %s" % ( | |
"reading" if self._mode == "r" else "writing", | |
e | |
) | |
) | |
def decompress(file_obj): | |
if file_obj.read(8) != b"mozLz40\0": | |
raise ValueError("Invalid magic number") | |
return lz4.block.decompress(file_obj.read()) | |
def compress(file_obj): | |
compressed = lz4.block.compress(file_obj.read()) | |
return b"mozLz40\0" + compressed | |
def get_argparser(): | |
p = argparse.ArgumentParser( | |
description="MozLz4a compression/decompression utility" | |
) | |
p.add_argument( | |
"-d", "--decompress", "--uncompress", | |
action="store_true", | |
help="Decompress the input file instead of compressing it." | |
) | |
p.add_argument( | |
"in_file", | |
type=BinFileArg("r"), | |
help="Path to input file. `-' means standard input." | |
) | |
p.add_argument( | |
"out_file", | |
type=BinFileArg("w"), | |
nargs="?", | |
default="-", | |
help="Path to output file. `-' means standard output (and is the default)." | |
) | |
return p | |
def main(): | |
args = get_argparser().parse_args() | |
try: | |
with args.in_file as fh: | |
if args.decompress: | |
data = decompress(fh) | |
else: | |
data = compress(fh) | |
except Exception as e: | |
print( | |
"Could not compress/decompress file `%s': %s" % ( | |
args.in_file.name, | |
e | |
), | |
file=sys.stderr | |
) | |
sys.exit(4) | |
try: | |
with args.out_file as fh: | |
fh.write(data) | |
except Exception as e: | |
print( | |
"Could not write to output file `%s': %s" % ( | |
args.out_file.name, | |
e | |
), | |
file=sys.stderr | |
) | |
sys.exit(5) | |
if __name__ == "__main__": | |
sys.exit(main()) |
I have to agree, I’m pretty taken aback by this.
The effective reason is simple: They have no obligation to provide you a reason.
The real reason is then also simple: They want the vast majority of users, including fellow developers, to use a specific subset of search engines that suit their plans for your activity.
If you are trying to extract your search engines, it's very hard, even after decompressing there is a ton of junk in the file (images). I found a web service here. But it can not modify the engines. It looks like it's the most hostile format possible for an open-source software.
Script updated:
- Now works with the latest 4.x release of the Python
lz4
package. - Compression/decompression to/from stdout/stdin is now supported.
Can anyone explain how to use this script to add a search engine to the lz4 file, compress and use it in Firefox? Thanks!
I have all dependencies installed, including pip
and lz4
, yet the only thing I get after running ./mozlz4a.py search.json.mozlz4
is this
@simurq I'd say the script is not enough for parsing the search engines.
Here is an HTML page under a free license you can save that does it for you (but also save the linked JS files):
FYI I have a tool that can process the search engines correctly: https://github.com/lilydjwg/mozlz4-tool
God, if I could upvote this a hundred times I would. Why would anyone design a NEW file format (jsonlz4) for something that we already have so many STANDARD, well-defined formats for? Why does Mozilla require end-users to download, compile, test different potential hacks to get their data out of a custom format? Just use a STANDARD format!