M-O-Z-G/iconize_search_engines.py

## iconize_search_engines.py
import json
import requests
import base64
import argparse
import sys
import os
from urllib.parse import urlparse
import lz4.block
from io import BytesIO
from PIL import Image

# Description:
# Many browsers and extensions handle the storage and processing of search engine lists
# differently, which can create challenges when attempting to consolidate them in
# a Gecko-based browser. An ideal and streamlined approach is to transfer only the search
# engine URL, name, and shortcut, as in most cases the relevant search engine icon
# is already available online if the engine itself is up-to-date. However, Gecko-based
# browsers do not support on-the-fly icon loading; instead, they store these icons within
# their own database.
#
# To address this issue, this script was developed. It is assumed that you have already
# imported or created all the necessary search engines within your browser and obtained
# the uncompressed search.json file. The script will then work with this file, attempting
# to retrieve icons by leveraging three APIs: Google, Favicon Kit, or DuckDuckGo.
#
# Instruction:
# This script processes a search.json file, finds blocks with "_iconMapObj": null,
# and attempts to fetch and embed favicons as base64 Data URIs.
# It validates the fetched images and provides user interaction for invalid/missing icons.
# It then saves both a pretty-printed JSON file and a compressed .mozlz4 file.
#
# Requirements:
# - Python 3.x
# - requests library: pip install requests
# - lz4 library:      pip install lz4
# - Pillow library:   pip install pillow
#
# Usage:
# 1. Save the script as a .py file (e.g., iconize_search_engines.py).
# 2. Run from the command line:
#    - With argument:  python iconize_search_engines.py input.json
#    - Without argument: python iconize_search_engines.py  (You'll be prompted for the filename)
#
# The script will generate two files:
# - <original_filename>_iconized.json
# - <original_filename>_iconized.json.mozlz4


def get_favicon_as_data_uri(hostname):
	"""
	Fetches a favicon from various APIs, validates it, and returns it as a base64 Data URI.

	Args:
		hostname: The hostname for which to fetch the favicon.

	Returns:
		A base64 Data URI string representing the favicon, or None if no valid favicon could be found.
	"""

	apis = [
		f"https://www.google.com/s2/favicons?domain={hostname}&sz=16",
		f"https://api.faviconkit.com/{hostname}/16",
		f"https://icons.duckduckgo.com/ip3/{hostname}.ico",
	]

	for api_url in apis:
		print(f"Trying API: {api_url}")
		try:
			response = requests.get(api_url)
			response.raise_for_status()

			content_type = response.headers.get('content-type', '').lower()
			if 'image/png' in content_type:
				prefix = "data:image/png;base64,"
			elif 'image/gif' in content_type:
				prefix = "data:image/gif;base64,"
			elif 'image/x-icon' in content_type or 'image/vnd.microsoft.icon' in content_type:
				prefix = "data:image/x-icon;base64,"
			else:
				print(f"  Unsupported content type: {content_type}")
				continue

			# Validate image size using Pillow
			try:
				image = Image.open(BytesIO(response.content))
				if image.size != (16, 16):
					print(f"  Invalid image size: {image.size}. Expected (16, 16).")
					continue
			except Exception as e:
				print(f"  Error validating image: {e}")
				continue

			base64_favicon = base64.b64encode(response.content).decode('utf-8')
			data_uri = f"{prefix}{base64_favicon}"
			print(f"  Successfully fetched and converted favicon from {api_url}")
			return data_uri

		except requests.exceptions.RequestException as e:
			print(f"  Error fetching from {api_url}: {e}")

	# Manual input option if all APIs fail
	while True:
		user_input = input(f"Could not retrieve a valid favicon for {hostname} from any API.\n"
						   f"Enter a URL to a 16x16 favicon image, or type 'skip' to leave it as null: ")
		if user_input.lower() == 'skip':
			return None
		try:
			response = requests.get(user_input)
			response.raise_for_status()

			content_type = response.headers.get('content-type', '').lower()
			if 'image/png' in content_type:
				prefix = "data:image/png;base64,"
			elif 'image/gif' in content_type:
				prefix = "data:image/gif;base64,"
			elif 'image/x-icon' in content_type or 'image/vnd.microsoft.icon' in content_type:
				prefix = "data:image/x-icon;base64,"
			else:
				print(f"  Unsupported content type: {content_type}. Please provide a valid image URL.")
				continue

			# Validate image size
			try:
				image = Image.open(BytesIO(response.content))
				if image.size != (16, 16):
					print(f"  Invalid image size: {image.size}. Expected (16, 16). Please provide a 16x16 image.")
					continue
			except Exception as e:
				print(f"  Error validating image: {e}. Please provide a valid image URL.")
				continue

			base64_favicon = base64.b64encode(response.content).decode('utf-8')
			data_uri = f"{prefix}{base64_favicon}"
			print(f"  Successfully fetched and converted favicon from user-provided URL.")
			return data_uri

		except requests.exceptions.RequestException as e:
			print(f"  Error fetching from provided URL: {e}. Please provide a valid URL or type 'skip'.")
		except Exception as e:
			print(f"  Error with provided URL: {e}. Please provide a valid URL or type 'skip'.")


def compress_to_mozlz4(json_filepath, mozlz4_filepath):
	"""Compresses a JSON file to .mozlz4 format."""
	try:
		with open(json_filepath, 'rb') as f:
			json_data = f.read()

		magic_number = b'mozLz40\0'
		uncompressed_size = len(json_data).to_bytes(4, byteorder='little')
		compressed_data = lz4.block.compress(json_data, mode='high_compression', store_size=False)

		with open(mozlz4_filepath, 'wb') as f:
			f.write(magic_number)
			f.write(uncompressed_size)
			f.write(compressed_data)
		print(f"Successfully compressed JSON to: {mozlz4_filepath}")

	except Exception as e:
		print(f"Error during compression: {e}")


def process_json_file(filepath):
	"""
	Processes the JSON file, updating _iconMapObj with favicon Data URIs,
	saves the updated JSON, and compresses it to .mozlz4.

	Args:
		filepath: The path to the JSON file.
	"""
	output_filepath = filepath.rsplit('.', 1)[0] + '_iconized.json'
	mozlz4_filepath = output_filepath + '.mozlz4'

	if os.path.exists(output_filepath):
		print(f"Iconized JSON file already exists: {output_filepath}")
		compress_to_mozlz4(output_filepath, mozlz4_filepath)
		return

	try:
		with open(filepath, 'r') as f:
			data = json.load(f)
	except FileNotFoundError:
		print(f"Error: File not found: {filepath}")
		return
	except json.JSONDecodeError:
		print(f"Error: Invalid JSON format in file: {filepath}")
		return

	if not isinstance(data, dict) or 'engines' not in data or not isinstance(data['engines'], list):
		print("Error: The JSON file should contain a dictionary with an 'engines' key holding a list of objects.")
		return

	engines = data['engines']
	changes_made = False

	for block in engines:
		if '_iconMapObj' in block and block['_iconMapObj'] is None:
			if '_urls' in block and isinstance(block['_urls'], list) and len(block['_urls']) > 0:
				first_url = block['_urls'][0]
				if 'template' in first_url:
					template_url = first_url['template']
					try:
						parsed_url = urlparse(template_url)
						hostname = parsed_url.netloc
						if hostname:
							print(f"Processing block with id: {block.get('id', 'N/A')}, hostname: {hostname}")
							data_uri = get_favicon_as_data_uri(hostname)
							if data_uri:
								block['_iconMapObj'] = {"16": data_uri}
								changes_made = True
							else:
								print(f"No valid favicon found or user skipped for: {hostname}")

						else:
							print(f"  Invalid URL template (no hostname): {template_url}")
					except ValueError:
						print(f"  Could not parse URL: {template_url}")
				else:
					print("  No 'template' key found in _urls.")
			else:
				print("  No '_urls' found in block.")

	if changes_made:
		try:
			with open(output_filepath, 'w') as f:
				json.dump(data, f, indent=4)
			print(f"Successfully wrote updated JSON to: {output_filepath}")
			compress_to_mozlz4(output_filepath, mozlz4_filepath)

		except Exception as e:
			print(f"Error writing to output file: {e}")
	else:
		print("No changes were made to the JSON file.")


def main():
	"""
	Main function to handle command-line arguments and user input.
	"""
	parser = argparse.ArgumentParser(description="Process JSON file and add favicons.")
	parser.add_argument("filepath", nargs='?', help="Path to the JSON file", default=None)
	args = parser.parse_args()

	if args.filepath:
		filepath = args.filepath
	else:
		filepath = input("Enter the path to the JSON file: ")

	process_json_file(filepath)


if __name__ == "__main__":
	main()
	import json
	import requests
	import base64
	import argparse
	import sys
	import os
	from urllib.parse import urlparse
	import lz4.block
	from io import BytesIO
	from PIL import Image

	# Description:
	# Many browsers and extensions handle the storage and processing of search engine lists
	# differently, which can create challenges when attempting to consolidate them in
	# a Gecko-based browser. An ideal and streamlined approach is to transfer only the search
	# engine URL, name, and shortcut, as in most cases the relevant search engine icon
	# is already available online if the engine itself is up-to-date. However, Gecko-based
	# browsers do not support on-the-fly icon loading; instead, they store these icons within
	# their own database.
	#
	# To address this issue, this script was developed. It is assumed that you have already
	# imported or created all the necessary search engines within your browser and obtained
	# the uncompressed search.json file. The script will then work with this file, attempting
	# to retrieve icons by leveraging three APIs: Google, Favicon Kit, or DuckDuckGo.
	#
	# Instruction:
	# This script processes a search.json file, finds blocks with "_iconMapObj": null,
	# and attempts to fetch and embed favicons as base64 Data URIs.
	# It validates the fetched images and provides user interaction for invalid/missing icons.
	# It then saves both a pretty-printed JSON file and a compressed .mozlz4 file.
	#
	# Requirements:
	# - Python 3.x
	# - requests library: pip install requests
	# - lz4 library: pip install lz4
	# - Pillow library: pip install pillow
	#
	# Usage:
	# 1. Save the script as a .py file (e.g., iconize_search_engines.py).
	# 2. Run from the command line:
	# - With argument: python iconize_search_engines.py input.json
	# - Without argument: python iconize_search_engines.py (You'll be prompted for the filename)
	#
	# The script will generate two files:
	# - <original_filename>_iconized.json
	# - <original_filename>_iconized.json.mozlz4


	def get_favicon_as_data_uri(hostname):
	"""
	Fetches a favicon from various APIs, validates it, and returns it as a base64 Data URI.

	Args:
	hostname: The hostname for which to fetch the favicon.

	Returns:
	A base64 Data URI string representing the favicon, or None if no valid favicon could be found.
	"""

	apis = [
	f"https://www.google.com/s2/favicons?domain={hostname}&sz=16",
	f"https://api.faviconkit.com/{hostname}/16",
	f"https://icons.duckduckgo.com/ip3/{hostname}.ico",
	]

	for api_url in apis:
	print(f"Trying API: {api_url}")
	try:
	response = requests.get(api_url)
	response.raise_for_status()

	content_type = response.headers.get('content-type', '').lower()
	if 'image/png' in content_type:
	prefix = "data:image/png;base64,"
	elif 'image/gif' in content_type:
	prefix = "data:image/gif;base64,"
	elif 'image/x-icon' in content_type or 'image/vnd.microsoft.icon' in content_type:
	prefix = "data:image/x-icon;base64,"
	else:
	print(f" Unsupported content type: {content_type}")
	continue

	# Validate image size using Pillow
	try:
	image = Image.open(BytesIO(response.content))
	if image.size != (16, 16):
	print(f" Invalid image size: {image.size}. Expected (16, 16).")
	continue
	except Exception as e:
	print(f" Error validating image: {e}")
	continue

	base64_favicon = base64.b64encode(response.content).decode('utf-8')
	data_uri = f"{prefix}{base64_favicon}"
	print(f" Successfully fetched and converted favicon from {api_url}")
	return data_uri

	except requests.exceptions.RequestException as e:
	print(f" Error fetching from {api_url}: {e}")

	# Manual input option if all APIs fail
	while True:
	user_input = input(f"Could not retrieve a valid favicon for {hostname} from any API.\n"
	f"Enter a URL to a 16x16 favicon image, or type 'skip' to leave it as null: ")
	if user_input.lower() == 'skip':
	return None
	try:
	response = requests.get(user_input)
	response.raise_for_status()

	content_type = response.headers.get('content-type', '').lower()
	if 'image/png' in content_type:
	prefix = "data:image/png;base64,"
	elif 'image/gif' in content_type:
	prefix = "data:image/gif;base64,"
	elif 'image/x-icon' in content_type or 'image/vnd.microsoft.icon' in content_type:
	prefix = "data:image/x-icon;base64,"
	else:
	print(f" Unsupported content type: {content_type}. Please provide a valid image URL.")
	continue

	# Validate image size
	try:
	image = Image.open(BytesIO(response.content))
	if image.size != (16, 16):
	print(f" Invalid image size: {image.size}. Expected (16, 16). Please provide a 16x16 image.")
	continue
	except Exception as e:
	print(f" Error validating image: {e}. Please provide a valid image URL.")
	continue

	base64_favicon = base64.b64encode(response.content).decode('utf-8')
	data_uri = f"{prefix}{base64_favicon}"
	print(f" Successfully fetched and converted favicon from user-provided URL.")
	return data_uri

	except requests.exceptions.RequestException as e:
	print(f" Error fetching from provided URL: {e}. Please provide a valid URL or type 'skip'.")
	except Exception as e:
	print(f" Error with provided URL: {e}. Please provide a valid URL or type 'skip'.")



	def compress_to_mozlz4(json_filepath, mozlz4_filepath):
	"""Compresses a JSON file to .mozlz4 format."""
	try:
	with open(json_filepath, 'rb') as f:
	json_data = f.read()

	magic_number = b'mozLz40\0'
	uncompressed_size = len(json_data).to_bytes(4, byteorder='little')
	compressed_data = lz4.block.compress(json_data, mode='high_compression', store_size=False)

	with open(mozlz4_filepath, 'wb') as f:
	f.write(magic_number)
	f.write(uncompressed_size)
	f.write(compressed_data)
	print(f"Successfully compressed JSON to: {mozlz4_filepath}")

	except Exception as e:
	print(f"Error during compression: {e}")


	def process_json_file(filepath):
	"""
	Processes the JSON file, updating _iconMapObj with favicon Data URIs,
	saves the updated JSON, and compresses it to .mozlz4.

	Args:
	filepath: The path to the JSON file.
	"""
	output_filepath = filepath.rsplit('.', 1)[0] + '_iconized.json'
	mozlz4_filepath = output_filepath + '.mozlz4'

	if os.path.exists(output_filepath):
	print(f"Iconized JSON file already exists: {output_filepath}")
	compress_to_mozlz4(output_filepath, mozlz4_filepath)
	return

	try:
	with open(filepath, 'r') as f:
	data = json.load(f)
	except FileNotFoundError:
	print(f"Error: File not found: {filepath}")
	return
	except json.JSONDecodeError:
	print(f"Error: Invalid JSON format in file: {filepath}")
	return

	if not isinstance(data, dict) or 'engines' not in data or not isinstance(data['engines'], list):
	print("Error: The JSON file should contain a dictionary with an 'engines' key holding a list of objects.")
	return

	engines = data['engines']
	changes_made = False

	for block in engines:
	if '_iconMapObj' in block and block['_iconMapObj'] is None:
	if '_urls' in block and isinstance(block['_urls'], list) and len(block['_urls']) > 0:
	first_url = block['_urls'][0]
	if 'template' in first_url:
	template_url = first_url['template']
	try:
	parsed_url = urlparse(template_url)
	hostname = parsed_url.netloc
	if hostname:
	print(f"Processing block with id: {block.get('id', 'N/A')}, hostname: {hostname}")
	data_uri = get_favicon_as_data_uri(hostname)
	if data_uri:
	block['_iconMapObj'] = {"16": data_uri}
	changes_made = True
	else:
	print(f"No valid favicon found or user skipped for: {hostname}")

	else:
	print(f" Invalid URL template (no hostname): {template_url}")
	except ValueError:
	print(f" Could not parse URL: {template_url}")
	else:
	print(" No 'template' key found in _urls.")
	else:
	print(" No '_urls' found in block.")

	if changes_made:
	try:
	with open(output_filepath, 'w') as f:
	json.dump(data, f, indent=4)
	print(f"Successfully wrote updated JSON to: {output_filepath}")
	compress_to_mozlz4(output_filepath, mozlz4_filepath)

	except Exception as e:
	print(f"Error writing to output file: {e}")
	else:
	print("No changes were made to the JSON file.")


	def main():
	"""
	Main function to handle command-line arguments and user input.
	"""
	parser = argparse.ArgumentParser(description="Process JSON file and add favicons.")
	parser.add_argument("filepath", nargs='?', help="Path to the JSON file", default=None)
	args = parser.parse_args()

	if args.filepath:
	filepath = args.filepath
	else:
	filepath = input("Enter the path to the JSON file: ")

	process_json_file(filepath)


	if __name__ == "__main__":
	main()