wareya/dump.py

## dump.py
#!python

mypath = "M26.75,23.5c1.48-0.24,3.35-0.52,5.49-0.84C46.03,20.62,63,17.75,74,17.25c4-0.18,6.09,0.97,5.5,4.75C76.25,42.75,77.25,71.75,92,87.75c6.95,7.54,5.75,1,6-5.5"


commands = "MmZzLlHhVvCcSsQqTtAaBb";

rules = [
["comma", r"^(,)(.*)"],
["number", r"^([+\-]?(?:[0-9]+(?:\.[0-9]+)?|\.[0-9]+))(.*)"],
["command", r"^([MmZzLlHhVvCcSsQqTtAaBb])(.*)"]
]

import re

def tokenize(path):
    for rule in rules:
        match = re.match(rule[1], path)
        if match:
            return (match.group(1), match.group(2))
    return None

def tokenize_path(path):
    tokens = []
    while path != "":
        path = path.strip()
        token, path = tokenize(path)
        if not token:
            break
        if token != ",":
            tokens += [token]
    return tokens

def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

def round_off(n):
    return round(n*100)/100

def tokens_to_path_data(tokens):
    curx = 0
    cury = 0
    path_data = []
    i = 0
    while i < len(tokens):
        def get(j):
            return float(tokens[i+j])
        token = tokens[i]
        if token in "Mm":
            if token == "M":
                curx = get(1)
                cury = get(2)
            else:
                curx += get(1)
                cury += get(2)
            i += 3
        elif token in "Cc":
            loop = True
            pathlist = []
            i += 1
            while loop:
                loop = False
                x1 = get(0)
                y1 = get(1)
                x2 = get(2)
                y2 = get(3)
                x = get(4)
                y = get(5)
                if i+6 < len(tokens) and is_number(tokens[i+6]):
                    loop = True
                if token == "C":
                    coords = [(curx, cury), (x1, y1), (x2, y2), (x, y)]
                    lastx2 = x2
                    lasty2 = y2
                    lastx = x
                    lasty = y
                else:
                    coords = [(curx, cury), (x1+curx, y1+cury), (x2+curx, y2+cury), (x+curx, y+cury)]
                    lastx2 = x2+curx
                    lasty2 = y2+cury
                    lastx = x+curx
                    lasty = y+cury
                coords = [(round_off(x[0]), round_off(x[1])) for x in coords]
                pathlist += [coords]
                curx = lastx
                cury = lasty
                i += 6
            path_data += pathlist
        elif token in "Ss":
            loop = True
            pathlist = []
            i += 1
            while loop:
                loop = False
                #x1 = get(0)
                #y1 = get(1)
                x1 = lastx*2 - lastx2
                y1 = lasty*2 - lasty2
                x2 = get(0)
                y2 = get(1)
                x = get(2)
                y = get(3)
                if i+4 < len(tokens) and is_number(tokens[i+4]):
                    loop = True
                if token == "S":
                    coords = [(curx, cury), (x1, y1), (x2, y2), (x, y)]
                    lastx2 = x2
                    lasty2 = y2
                    lastx = x
                    lasty = y
                else:
                    coords = [(curx, cury), (x1, y1), (x2+curx, y2+cury), (x+curx, y+cury)]
                    lastx2 = x2+curx
                    lasty2 = y2+cury
                    lastx = x+curx
                    lasty = y+cury
                coords = [(round_off(x[0]), round_off(x[1])) for x in coords]
                pathlist += [coords]
                curx = lastx
                cury = lasty
                i += 4
            path_data += pathlist
        else:
            print(f"unexpected token {token} at {i}")
            print(f"{tokens}")
            exit()
    return path_data

#print(tokens_to_path_data(tokenize_path(mypath)))

def path_data_from_path(path):
    return tokens_to_path_data(tokenize_path(path))

import os, json
import numpy as np
from PIL import Image, ImageDraw
import aggdraw

from json import encoder
encoder.FLOAT_REPR = lambda o: format(o, '.2f')

mydir = "kanjivg-master/kanji/"
for filename in os.listdir(mydir):#["087f2.svg"]:
    rawfname = filename
    if not filename.endswith(".svg"):
        continue
    if "-" in filename:
        continue
    print(filename)
    filename = mydir+filename
    import xml.etree.ElementTree as ET
    tree = ET.parse(filename)
    strokes = []
    for path in  tree.iter("{http://www.w3.org/2000/svg}path"):
        strokes += [path_data_from_path(path.attrib["d"])]
    with open("json/"+rawfname.replace("svg", "json"), "w", encoding="utf-8") as f:
        f.write(json.dumps(strokes))

    size = 960

    im = Image.new("L", (size, size), "black")
    canvas = aggdraw.Draw(im)
    #canvas.settransform((192*4/109, 0, 0, 0, (192*4/109), 0)) # fucks up small curves
    tf = size/109
    pensize = 3*size/109 # use 3 for exactly the same thickness as kanjivg
    pen = aggdraw.Pen("white", width=pensize)
    brush = aggdraw.Brush("white")

    for stroke in strokes:
        path = aggdraw.Path()
        path.moveto(stroke[0][0][0]*tf, stroke[0][0][1]*tf)
        for bezier in stroke:
            bezier = [(x[0]*tf, x[1]*tf) for x in bezier]
            #path = aggdraw.Path()
            #path.moveto(bezier[0][0], bezier[0][1])
            path.curveto(bezier[1][0], bezier[1][1], bezier[2][0], bezier[2][1], bezier[3][0], bezier[3][1])
        canvas.path(path, pen)

        first = (stroke[0][0][0]*tf, stroke[0][0][1]*tf)
        last = (stroke[-1][-1][0]*tf, stroke[-1][-1][1]*tf)
        canvas.ellipse((first[0]-pensize/2, first[1]-pensize/2, first[0]+pensize/2, first[1]+pensize/2), None, brush)
        canvas.ellipse((last[0]-pensize/2, last[1]-pensize/2, last[0]+pensize/2, last[1]+pensize/2), None, brush)

        canvas.flush()

    im = im.resize((120, 120), Image.BOX)

    #array = np.asarray(im)
    #newarray = np.zeros((48, 48))
    #for x in range(48):
    #    for y in range(48):
    #        value = 0
    #        for i in range(16):
    #            for j in range(16):
    #                value += array[x*16+i, y*16+j]
    #        value /= 16*16
    #        newarray[x, y] = value
    #array = newarray
    #array = array.reshape((48, 48))
    #array = array.astype("uint8")
    #im = Image.fromarray(array)

    im.save("images/"+rawfname.replace("svg", "png"))
	#!python

	mypath = "M26.75,23.5c1.48-0.24,3.35-0.52,5.49-0.84C46.03,20.62,63,17.75,74,17.25c4-0.18,6.09,0.97,5.5,4.75C76.25,42.75,77.25,71.75,92,87.75c6.95,7.54,5.75,1,6-5.5"


	commands = "MmZzLlHhVvCcSsQqTtAaBb";

	rules = [
	["comma", r"^(,)(.*)"],
	["number", r"^([+\-]?(?:[0-9]+(?:\.[0-9]+)?\|\.[0-9]+))(.*)"],
	["command", r"^([MmZzLlHhVvCcSsQqTtAaBb])(.*)"]
	]

	import re

	def tokenize(path):
	for rule in rules:
	match = re.match(rule[1], path)
	if match:
	return (match.group(1), match.group(2))
	return None

	def tokenize_path(path):
	tokens = []
	while path != "":
	path = path.strip()
	token, path = tokenize(path)
	if not token:
	break
	if token != ",":
	tokens += [token]
	return tokens

	def is_number(s):
	try:
	float(s)
	return True
	except ValueError:
	return False

	def round_off(n):
	return round(n*100)/100

	def tokens_to_path_data(tokens):
	curx = 0
	cury = 0
	path_data = []
	i = 0
	while i < len(tokens):
	def get(j):
	return float(tokens[i+j])
	token = tokens[i]
	if token in "Mm":
	if token == "M":
	curx = get(1)
	cury = get(2)
	else:
	curx += get(1)
	cury += get(2)
	i += 3
	elif token in "Cc":
	loop = True
	pathlist = []
	i += 1
	while loop:
	loop = False
	x1 = get(0)
	y1 = get(1)
	x2 = get(2)
	y2 = get(3)
	x = get(4)
	y = get(5)
	if i+6 < len(tokens) and is_number(tokens[i+6]):
	loop = True
	if token == "C":
	coords = [(curx, cury), (x1, y1), (x2, y2), (x, y)]
	lastx2 = x2
	lasty2 = y2
	lastx = x
	lasty = y
	else:
	coords = [(curx, cury), (x1+curx, y1+cury), (x2+curx, y2+cury), (x+curx, y+cury)]
	lastx2 = x2+curx
	lasty2 = y2+cury
	lastx = x+curx
	lasty = y+cury
	coords = [(round_off(x[0]), round_off(x[1])) for x in coords]
	pathlist += [coords]
	curx = lastx
	cury = lasty
	i += 6
	path_data += pathlist
	elif token in "Ss":
	loop = True
	pathlist = []
	i += 1
	while loop:
	loop = False
	#x1 = get(0)
	#y1 = get(1)
	x1 = lastx*2 - lastx2
	y1 = lasty*2 - lasty2
	x2 = get(0)
	y2 = get(1)
	x = get(2)
	y = get(3)
	if i+4 < len(tokens) and is_number(tokens[i+4]):
	loop = True
	if token == "S":
	coords = [(curx, cury), (x1, y1), (x2, y2), (x, y)]
	lastx2 = x2
	lasty2 = y2
	lastx = x
	lasty = y
	else:
	coords = [(curx, cury), (x1, y1), (x2+curx, y2+cury), (x+curx, y+cury)]
	lastx2 = x2+curx
	lasty2 = y2+cury
	lastx = x+curx
	lasty = y+cury
	coords = [(round_off(x[0]), round_off(x[1])) for x in coords]
	pathlist += [coords]
	curx = lastx
	cury = lasty
	i += 4
	path_data += pathlist
	else:
	print(f"unexpected token {token} at {i}")
	print(f"{tokens}")
	exit()
	return path_data

	#print(tokens_to_path_data(tokenize_path(mypath)))

	def path_data_from_path(path):
	return tokens_to_path_data(tokenize_path(path))

	import os, json
	import numpy as np
	from PIL import Image, ImageDraw
	import aggdraw

	from json import encoder
	encoder.FLOAT_REPR = lambda o: format(o, '.2f')

	mydir = "kanjivg-master/kanji/"
	for filename in os.listdir(mydir):#["087f2.svg"]:
	rawfname = filename
	if not filename.endswith(".svg"):
	continue
	if "-" in filename:
	continue
	print(filename)
	filename = mydir+filename
	import xml.etree.ElementTree as ET
	tree = ET.parse(filename)
	strokes = []
	for path in tree.iter("{http://www.w3.org/2000/svg}path"):
	strokes += [path_data_from_path(path.attrib["d"])]
	with open("json/"+rawfname.replace("svg", "json"), "w", encoding="utf-8") as f:
	f.write(json.dumps(strokes))

	size = 960

	im = Image.new("L", (size, size), "black")
	canvas = aggdraw.Draw(im)
	#canvas.settransform((1924/109, 0, 0, 0, (1924/109), 0)) # fucks up small curves
	tf = size/109
	pensize = 3*size/109 # use 3 for exactly the same thickness as kanjivg
	pen = aggdraw.Pen("white", width=pensize)
	brush = aggdraw.Brush("white")

	for stroke in strokes:
	path = aggdraw.Path()
	path.moveto(stroke[0][0][0]tf, stroke[0][0][1]tf)
	for bezier in stroke:
	bezier = [(x[0]tf, x[1]tf) for x in bezier]
	#path = aggdraw.Path()
	#path.moveto(bezier[0][0], bezier[0][1])
	path.curveto(bezier[1][0], bezier[1][1], bezier[2][0], bezier[2][1], bezier[3][0], bezier[3][1])
	canvas.path(path, pen)

	first = (stroke[0][0][0]tf, stroke[0][0][1]tf)
	last = (stroke[-1][-1][0]tf, stroke[-1][-1][1]tf)
	canvas.ellipse((first[0]-pensize/2, first[1]-pensize/2, first[0]+pensize/2, first[1]+pensize/2), None, brush)
	canvas.ellipse((last[0]-pensize/2, last[1]-pensize/2, last[0]+pensize/2, last[1]+pensize/2), None, brush)

	canvas.flush()

	im = im.resize((120, 120), Image.BOX)

	#array = np.asarray(im)
	#newarray = np.zeros((48, 48))
	#for x in range(48):
	# for y in range(48):
	# value = 0
	# for i in range(16):
	# for j in range(16):
	# value += array[x16+i, y16+j]
	# value /= 16*16
	# newarray[x, y] = value
	#array = newarray
	#array = array.reshape((48, 48))
	#array = array.astype("uint8")
	#im = Image.fromarray(array)

	im.save("images/"+rawfname.replace("svg", "png"))