Created
May 30, 2018 04:36
-
-
Save wareya/f54b203f1a8d33369fe92268cb7f62d9 to your computer and use it in GitHub Desktop.
kanjivg -> json, images (no stroke order/groups)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python | |
mypath = "M26.75,23.5c1.48-0.24,3.35-0.52,5.49-0.84C46.03,20.62,63,17.75,74,17.25c4-0.18,6.09,0.97,5.5,4.75C76.25,42.75,77.25,71.75,92,87.75c6.95,7.54,5.75,1,6-5.5" | |
commands = "MmZzLlHhVvCcSsQqTtAaBb"; | |
rules = [ | |
["comma", r"^(,)(.*)"], | |
["number", r"^([+\-]?(?:[0-9]+(?:\.[0-9]+)?|\.[0-9]+))(.*)"], | |
["command", r"^([MmZzLlHhVvCcSsQqTtAaBb])(.*)"] | |
] | |
import re | |
def tokenize(path): | |
for rule in rules: | |
match = re.match(rule[1], path) | |
if match: | |
return (match.group(1), match.group(2)) | |
return None | |
def tokenize_path(path): | |
tokens = [] | |
while path != "": | |
path = path.strip() | |
token, path = tokenize(path) | |
if not token: | |
break | |
if token != ",": | |
tokens += [token] | |
return tokens | |
def is_number(s): | |
try: | |
float(s) | |
return True | |
except ValueError: | |
return False | |
def round_off(n): | |
return round(n*100)/100 | |
def tokens_to_path_data(tokens): | |
curx = 0 | |
cury = 0 | |
path_data = [] | |
i = 0 | |
while i < len(tokens): | |
def get(j): | |
return float(tokens[i+j]) | |
token = tokens[i] | |
if token in "Mm": | |
if token == "M": | |
curx = get(1) | |
cury = get(2) | |
else: | |
curx += get(1) | |
cury += get(2) | |
i += 3 | |
elif token in "Cc": | |
loop = True | |
pathlist = [] | |
i += 1 | |
while loop: | |
loop = False | |
x1 = get(0) | |
y1 = get(1) | |
x2 = get(2) | |
y2 = get(3) | |
x = get(4) | |
y = get(5) | |
if i+6 < len(tokens) and is_number(tokens[i+6]): | |
loop = True | |
if token == "C": | |
coords = [(curx, cury), (x1, y1), (x2, y2), (x, y)] | |
lastx2 = x2 | |
lasty2 = y2 | |
lastx = x | |
lasty = y | |
else: | |
coords = [(curx, cury), (x1+curx, y1+cury), (x2+curx, y2+cury), (x+curx, y+cury)] | |
lastx2 = x2+curx | |
lasty2 = y2+cury | |
lastx = x+curx | |
lasty = y+cury | |
coords = [(round_off(x[0]), round_off(x[1])) for x in coords] | |
pathlist += [coords] | |
curx = lastx | |
cury = lasty | |
i += 6 | |
path_data += pathlist | |
elif token in "Ss": | |
loop = True | |
pathlist = [] | |
i += 1 | |
while loop: | |
loop = False | |
#x1 = get(0) | |
#y1 = get(1) | |
x1 = lastx*2 - lastx2 | |
y1 = lasty*2 - lasty2 | |
x2 = get(0) | |
y2 = get(1) | |
x = get(2) | |
y = get(3) | |
if i+4 < len(tokens) and is_number(tokens[i+4]): | |
loop = True | |
if token == "S": | |
coords = [(curx, cury), (x1, y1), (x2, y2), (x, y)] | |
lastx2 = x2 | |
lasty2 = y2 | |
lastx = x | |
lasty = y | |
else: | |
coords = [(curx, cury), (x1, y1), (x2+curx, y2+cury), (x+curx, y+cury)] | |
lastx2 = x2+curx | |
lasty2 = y2+cury | |
lastx = x+curx | |
lasty = y+cury | |
coords = [(round_off(x[0]), round_off(x[1])) for x in coords] | |
pathlist += [coords] | |
curx = lastx | |
cury = lasty | |
i += 4 | |
path_data += pathlist | |
else: | |
print(f"unexpected token {token} at {i}") | |
print(f"{tokens}") | |
exit() | |
return path_data | |
#print(tokens_to_path_data(tokenize_path(mypath))) | |
def path_data_from_path(path): | |
return tokens_to_path_data(tokenize_path(path)) | |
import os, json | |
import numpy as np | |
from PIL import Image, ImageDraw | |
import aggdraw | |
from json import encoder | |
encoder.FLOAT_REPR = lambda o: format(o, '.2f') | |
mydir = "kanjivg-master/kanji/" | |
for filename in os.listdir(mydir):#["087f2.svg"]: | |
rawfname = filename | |
if not filename.endswith(".svg"): | |
continue | |
if "-" in filename: | |
continue | |
print(filename) | |
filename = mydir+filename | |
import xml.etree.ElementTree as ET | |
tree = ET.parse(filename) | |
strokes = [] | |
for path in tree.iter("{http://www.w3.org/2000/svg}path"): | |
strokes += [path_data_from_path(path.attrib["d"])] | |
with open("json/"+rawfname.replace("svg", "json"), "w", encoding="utf-8") as f: | |
f.write(json.dumps(strokes)) | |
size = 960 | |
im = Image.new("L", (size, size), "black") | |
canvas = aggdraw.Draw(im) | |
#canvas.settransform((192*4/109, 0, 0, 0, (192*4/109), 0)) # fucks up small curves | |
tf = size/109 | |
pensize = 3*size/109 # use 3 for exactly the same thickness as kanjivg | |
pen = aggdraw.Pen("white", width=pensize) | |
brush = aggdraw.Brush("white") | |
for stroke in strokes: | |
path = aggdraw.Path() | |
path.moveto(stroke[0][0][0]*tf, stroke[0][0][1]*tf) | |
for bezier in stroke: | |
bezier = [(x[0]*tf, x[1]*tf) for x in bezier] | |
#path = aggdraw.Path() | |
#path.moveto(bezier[0][0], bezier[0][1]) | |
path.curveto(bezier[1][0], bezier[1][1], bezier[2][0], bezier[2][1], bezier[3][0], bezier[3][1]) | |
canvas.path(path, pen) | |
first = (stroke[0][0][0]*tf, stroke[0][0][1]*tf) | |
last = (stroke[-1][-1][0]*tf, stroke[-1][-1][1]*tf) | |
canvas.ellipse((first[0]-pensize/2, first[1]-pensize/2, first[0]+pensize/2, first[1]+pensize/2), None, brush) | |
canvas.ellipse((last[0]-pensize/2, last[1]-pensize/2, last[0]+pensize/2, last[1]+pensize/2), None, brush) | |
canvas.flush() | |
im = im.resize((120, 120), Image.BOX) | |
#array = np.asarray(im) | |
#newarray = np.zeros((48, 48)) | |
#for x in range(48): | |
# for y in range(48): | |
# value = 0 | |
# for i in range(16): | |
# for j in range(16): | |
# value += array[x*16+i, y*16+j] | |
# value /= 16*16 | |
# newarray[x, y] = value | |
#array = newarray | |
#array = array.reshape((48, 48)) | |
#array = array.astype("uint8") | |
#im = Image.fromarray(array) | |
im.save("images/"+rawfname.replace("svg", "png")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment