Skip to content

Instantly share code, notes, and snippets.

@wareya
Created May 30, 2018 04:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wareya/f54b203f1a8d33369fe92268cb7f62d9 to your computer and use it in GitHub Desktop.
Save wareya/f54b203f1a8d33369fe92268cb7f62d9 to your computer and use it in GitHub Desktop.
kanjivg -> json, images (no stroke order/groups)
#!python
mypath = "M26.75,23.5c1.48-0.24,3.35-0.52,5.49-0.84C46.03,20.62,63,17.75,74,17.25c4-0.18,6.09,0.97,5.5,4.75C76.25,42.75,77.25,71.75,92,87.75c6.95,7.54,5.75,1,6-5.5"
commands = "MmZzLlHhVvCcSsQqTtAaBb";
rules = [
["comma", r"^(,)(.*)"],
["number", r"^([+\-]?(?:[0-9]+(?:\.[0-9]+)?|\.[0-9]+))(.*)"],
["command", r"^([MmZzLlHhVvCcSsQqTtAaBb])(.*)"]
]
import re
def tokenize(path):
for rule in rules:
match = re.match(rule[1], path)
if match:
return (match.group(1), match.group(2))
return None
def tokenize_path(path):
tokens = []
while path != "":
path = path.strip()
token, path = tokenize(path)
if not token:
break
if token != ",":
tokens += [token]
return tokens
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
def round_off(n):
return round(n*100)/100
def tokens_to_path_data(tokens):
curx = 0
cury = 0
path_data = []
i = 0
while i < len(tokens):
def get(j):
return float(tokens[i+j])
token = tokens[i]
if token in "Mm":
if token == "M":
curx = get(1)
cury = get(2)
else:
curx += get(1)
cury += get(2)
i += 3
elif token in "Cc":
loop = True
pathlist = []
i += 1
while loop:
loop = False
x1 = get(0)
y1 = get(1)
x2 = get(2)
y2 = get(3)
x = get(4)
y = get(5)
if i+6 < len(tokens) and is_number(tokens[i+6]):
loop = True
if token == "C":
coords = [(curx, cury), (x1, y1), (x2, y2), (x, y)]
lastx2 = x2
lasty2 = y2
lastx = x
lasty = y
else:
coords = [(curx, cury), (x1+curx, y1+cury), (x2+curx, y2+cury), (x+curx, y+cury)]
lastx2 = x2+curx
lasty2 = y2+cury
lastx = x+curx
lasty = y+cury
coords = [(round_off(x[0]), round_off(x[1])) for x in coords]
pathlist += [coords]
curx = lastx
cury = lasty
i += 6
path_data += pathlist
elif token in "Ss":
loop = True
pathlist = []
i += 1
while loop:
loop = False
#x1 = get(0)
#y1 = get(1)
x1 = lastx*2 - lastx2
y1 = lasty*2 - lasty2
x2 = get(0)
y2 = get(1)
x = get(2)
y = get(3)
if i+4 < len(tokens) and is_number(tokens[i+4]):
loop = True
if token == "S":
coords = [(curx, cury), (x1, y1), (x2, y2), (x, y)]
lastx2 = x2
lasty2 = y2
lastx = x
lasty = y
else:
coords = [(curx, cury), (x1, y1), (x2+curx, y2+cury), (x+curx, y+cury)]
lastx2 = x2+curx
lasty2 = y2+cury
lastx = x+curx
lasty = y+cury
coords = [(round_off(x[0]), round_off(x[1])) for x in coords]
pathlist += [coords]
curx = lastx
cury = lasty
i += 4
path_data += pathlist
else:
print(f"unexpected token {token} at {i}")
print(f"{tokens}")
exit()
return path_data
#print(tokens_to_path_data(tokenize_path(mypath)))
def path_data_from_path(path):
return tokens_to_path_data(tokenize_path(path))
import os, json
import numpy as np
from PIL import Image, ImageDraw
import aggdraw
from json import encoder
encoder.FLOAT_REPR = lambda o: format(o, '.2f')
mydir = "kanjivg-master/kanji/"
for filename in os.listdir(mydir):#["087f2.svg"]:
rawfname = filename
if not filename.endswith(".svg"):
continue
if "-" in filename:
continue
print(filename)
filename = mydir+filename
import xml.etree.ElementTree as ET
tree = ET.parse(filename)
strokes = []
for path in tree.iter("{http://www.w3.org/2000/svg}path"):
strokes += [path_data_from_path(path.attrib["d"])]
with open("json/"+rawfname.replace("svg", "json"), "w", encoding="utf-8") as f:
f.write(json.dumps(strokes))
size = 960
im = Image.new("L", (size, size), "black")
canvas = aggdraw.Draw(im)
#canvas.settransform((192*4/109, 0, 0, 0, (192*4/109), 0)) # fucks up small curves
tf = size/109
pensize = 3*size/109 # use 3 for exactly the same thickness as kanjivg
pen = aggdraw.Pen("white", width=pensize)
brush = aggdraw.Brush("white")
for stroke in strokes:
path = aggdraw.Path()
path.moveto(stroke[0][0][0]*tf, stroke[0][0][1]*tf)
for bezier in stroke:
bezier = [(x[0]*tf, x[1]*tf) for x in bezier]
#path = aggdraw.Path()
#path.moveto(bezier[0][0], bezier[0][1])
path.curveto(bezier[1][0], bezier[1][1], bezier[2][0], bezier[2][1], bezier[3][0], bezier[3][1])
canvas.path(path, pen)
first = (stroke[0][0][0]*tf, stroke[0][0][1]*tf)
last = (stroke[-1][-1][0]*tf, stroke[-1][-1][1]*tf)
canvas.ellipse((first[0]-pensize/2, first[1]-pensize/2, first[0]+pensize/2, first[1]+pensize/2), None, brush)
canvas.ellipse((last[0]-pensize/2, last[1]-pensize/2, last[0]+pensize/2, last[1]+pensize/2), None, brush)
canvas.flush()
im = im.resize((120, 120), Image.BOX)
#array = np.asarray(im)
#newarray = np.zeros((48, 48))
#for x in range(48):
# for y in range(48):
# value = 0
# for i in range(16):
# for j in range(16):
# value += array[x*16+i, y*16+j]
# value /= 16*16
# newarray[x, y] = value
#array = newarray
#array = array.reshape((48, 48))
#array = array.astype("uint8")
#im = Image.fromarray(array)
im.save("images/"+rawfname.replace("svg", "png"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment