jlazovskis/color_rm_layers.py

## color_rm_layers.py
# Load packages
import subprocess
from os import listdir

# Tested on output from rm v2.4.1.30
# Input:   in_file : pdf file from Remarkable
#         out_name : name of pdf file that will be produced
#          out_dir : directory in which produced pdf file will be
#       color_dict : dictionary of colors to use for layers. Keys are integers starting at 1. Colors are RGB (0-255) lists
#       page_start : which page to start at
#         page_end : which page to end at
#        tolerance : how many times to search for a prticular layer before giving up
#
# Output: a pdf file with name out_name
#

def color_rm_layers(in_file, out_name, out_dir, color_dict, page_start, page_end, tolerance=20):
	# Set up lists for storing file names
	page_pdfs = []; temp_files = []; out_files = []; page_num = page_end - page_start +1

	# Separate files
	for p in range(page_start,page_end+1):
		out_pdf = out_dir+'page'+str(p-page_start+1)+'.pdf'
		subprocess.run(['pdftk', in_file, 'cat', str(p), 'output', out_pdf])
		page_pdfs.append(out_pdf)
	print('Separated file into '+str(page_num)+' pages', flush=True)

	# Set up colors
	color_list = list(color_dict.keys())
	color_list.sort()
	print('Will look for '+str(len(color_list))+' layers to color', flush=True)
	if max(color_list)-min(color_list) > len(color_list)-1:
		print('Warning: color_dict has skipped keys, missing layers will be ignored', flush=True)

	# Split every page
	print('Splitting and coloring layers',flush=True)
	for pp,page_name in enumerate(page_pdfs):
		p_ind = pp+1
		svg_produced = []; layers_found = []

		# Split by layer
		for color_index,path_index in zip(color_list,[2+c*2 for c in color_list]):
			print('Page '+str(p_ind)+'/'+str(page_num)+': Looking for layer '+str(color_index)+' ... ', flush=True, end='')
			tries = 0
			out_svg = out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.svg'
			while ('page'+str(p_ind)+'_path'+str(path_index)+'.svg' not in svg_produced) and tries < tolerance:
				subprocess.run(['inkscape',
					'-p', page_name,
					'--export-id=path'+str(path_index),
					'--export-id-only',
					'--export-area-page',
					'-o', out_svg], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
				svg_produced = [f for f in listdir(out_dir) if f[-3:]=='svg'] # and os.stat(out_dir+f).st_size > 1707)] # for grids which have layers as lines
				tries += 1

			# Layer exists
			if tries < tolerance:
				print('found', flush=True)
				layers_found.append(path_index)
				# Delete later
				temp_files.append(out_svg)

			# Layer does not exist
			else:
				print('not found', flush=True)

		# Recolor by layer
		print('Page '+str(p_ind)+'/'+str(page_num)+': Recoloring layers', flush=True)
		for path_index in layers_found:
			with open(out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.svg','r') as file:
				new_color = str(tuple(color_dict[(path_index-2)//2]))[1:-1]
				filedata = file.read()
				filedata = filedata.replace('0%,0%,0%',new_color)
			with open(out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.svg','w') as file:
				file.write(filedata)
			in_svg = out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.svg'
			out_pdf = out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.pdf'
			subprocess.run(['inkscape', '-p', in_svg, '-o', out_pdf], stdout=subprocess.DEVNULL)

			# Delete later
			temp_files.append(out_pdf)

		# Combine colored layers
		print('Page '+str(p_ind)+'/'+str(page_num)+': Combining colored layers into one pdf', flush=True)
		temp_counter = 1
		for layer,path_index in enumerate(layers_found[:-1]):

			# Choose the background
			if layer == 0:
				background_file = out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.pdf'
			else:
				background_file = out_dir+'page'+str(p_ind)+'_temp'+str(temp_counter-1)+'.pdf'

			# Combine with the foreground
			if path_index != layers_found[-2]:
				out_pdf = out_dir+'page'+str(p_ind)+'_temp'+str(temp_counter)+'.pdf'
				subprocess.run(['pdftk', out_dir+'page'+str(p_ind)+'_path'+str(layers_found[layer+1])+'.pdf',
					'background', background_file,
					'output', out_pdf])
				temp_counter += 1
				temp_files.append(out_pdf)
			else:
				out_pdf = out_dir+'page'+str(p_ind)+'_colored.pdf'
				subprocess.run(['pdftk', out_dir+'page'+str(p_ind)+'_path'+str(layers_found[layer+1])+'.pdf',
					'background', background_file,
					'output', out_pdf])
				out_files.append(out_pdf)

	# Combine all the colored pages together
	print('Combining '+str(page_num)+' colored pages into 1 pdf', flush=True)
	subprocess.run(['pdftk']+out_files+['cat', 'output', out_dir+out_name])

	# Delete temporary files
	print('Deleting temporary files',flush=True)
	for f in out_files+temp_files+page_pdfs:
		subprocess.run(['rm',f])

if __name__ == "__main__":
	color_rm_layers('~/notebook.pdf', 'notebook_colored.pdf', '~/', {1:[0,0,0], 2:[85,144,217], 3:[231,102, 106]}, 4, 10)
	# Load packages
	import subprocess
	from os import listdir

	# Tested on output from rm v2.4.1.30
	# Input: in_file : pdf file from Remarkable
	# out_name : name of pdf file that will be produced
	# out_dir : directory in which produced pdf file will be
	# color_dict : dictionary of colors to use for layers. Keys are integers starting at 1. Colors are RGB (0-255) lists
	# page_start : which page to start at
	# page_end : which page to end at
	# tolerance : how many times to search for a prticular layer before giving up
	#
	# Output: a pdf file with name out_name
	#

	def color_rm_layers(in_file, out_name, out_dir, color_dict, page_start, page_end, tolerance=20):
	# Set up lists for storing file names
	page_pdfs = []; temp_files = []; out_files = []; page_num = page_end - page_start +1

	# Separate files
	for p in range(page_start,page_end+1):
	out_pdf = out_dir+'page'+str(p-page_start+1)+'.pdf'
	subprocess.run(['pdftk', in_file, 'cat', str(p), 'output', out_pdf])
	page_pdfs.append(out_pdf)
	print('Separated file into '+str(page_num)+' pages', flush=True)

	# Set up colors
	color_list = list(color_dict.keys())
	color_list.sort()
	print('Will look for '+str(len(color_list))+' layers to color', flush=True)
	if max(color_list)-min(color_list) > len(color_list)-1:
	print('Warning: color_dict has skipped keys, missing layers will be ignored', flush=True)

	# Split every page
	print('Splitting and coloring layers',flush=True)
	for pp,page_name in enumerate(page_pdfs):
	p_ind = pp+1
	svg_produced = []; layers_found = []

	# Split by layer
	for color_index,path_index in zip(color_list,[2+c*2 for c in color_list]):
	print('Page '+str(p_ind)+'/'+str(page_num)+': Looking for layer '+str(color_index)+' ... ', flush=True, end='')
	tries = 0
	out_svg = out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.svg'
	while ('page'+str(p_ind)+'_path'+str(path_index)+'.svg' not in svg_produced) and tries < tolerance:
	subprocess.run(['inkscape',
	'-p', page_name,
	'--export-id=path'+str(path_index),
	'--export-id-only',
	'--export-area-page',
	'-o', out_svg], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
	svg_produced = [f for f in listdir(out_dir) if f[-3:]=='svg'] # and os.stat(out_dir+f).st_size > 1707)] # for grids which have layers as lines
	tries += 1

	# Layer exists
	if tries < tolerance:
	print('found', flush=True)
	layers_found.append(path_index)
	# Delete later
	temp_files.append(out_svg)

	# Layer does not exist
	else:
	print('not found', flush=True)

	# Recolor by layer
	print('Page '+str(p_ind)+'/'+str(page_num)+': Recoloring layers', flush=True)
	for path_index in layers_found:
	with open(out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.svg','r') as file:
	new_color = str(tuple(color_dict[(path_index-2)//2]))[1:-1]
	filedata = file.read()
	filedata = filedata.replace('0%,0%,0%',new_color)
	with open(out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.svg','w') as file:
	file.write(filedata)
	in_svg = out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.svg'
	out_pdf = out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.pdf'
	subprocess.run(['inkscape', '-p', in_svg, '-o', out_pdf], stdout=subprocess.DEVNULL)

	# Delete later
	temp_files.append(out_pdf)

	# Combine colored layers
	print('Page '+str(p_ind)+'/'+str(page_num)+': Combining colored layers into one pdf', flush=True)
	temp_counter = 1
	for layer,path_index in enumerate(layers_found[:-1]):

	# Choose the background
	if layer == 0:
	background_file = out_dir+'page'+str(p_ind)+'_path'+str(path_index)+'.pdf'
	else:
	background_file = out_dir+'page'+str(p_ind)+'_temp'+str(temp_counter-1)+'.pdf'

	# Combine with the foreground
	if path_index != layers_found[-2]:
	out_pdf = out_dir+'page'+str(p_ind)+'_temp'+str(temp_counter)+'.pdf'
	subprocess.run(['pdftk', out_dir+'page'+str(p_ind)+'_path'+str(layers_found[layer+1])+'.pdf',
	'background', background_file,
	'output', out_pdf])
	temp_counter += 1
	temp_files.append(out_pdf)
	else:
	out_pdf = out_dir+'page'+str(p_ind)+'_colored.pdf'
	subprocess.run(['pdftk', out_dir+'page'+str(p_ind)+'_path'+str(layers_found[layer+1])+'.pdf',
	'background', background_file,
	'output', out_pdf])
	out_files.append(out_pdf)

	# Combine all the colored pages together
	print('Combining '+str(page_num)+' colored pages into 1 pdf', flush=True)
	subprocess.run(['pdftk']+out_files+['cat', 'output', out_dir+out_name])

	# Delete temporary files
	print('Deleting temporary files',flush=True)
	for f in out_files+temp_files+page_pdfs:
	subprocess.run(['rm',f])

	if __name__ == "__main__":
	color_rm_layers('~/notebook.pdf', 'notebook_colored.pdf', '~/', {1:[0,0,0], 2:[85,144,217], 3:[231,102, 106]}, 4, 10)