Skip to content

Instantly share code, notes, and snippets.

@Yoxem
Created May 22, 2021 22:25
Show Gist options
  • Save Yoxem/d7150a50f45334a6846b58fe3a6551ad to your computer and use it in GitHub Desktop.
Save Yoxem/d7150a50f45334a6846b58fe3a6551ad to your computer and use it in GitHub Desktop.
Knuth Line-breaking implementation in Python
#!/usr/bin/env python3
#-*-coding:utf-8-*-
# License: GPLv3
import functools
import math
import re
line_width = 40
#input_text = "aaa bb cc ddddd"
input_text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vestibulum gravida semper elit, ac porttitor arcu convallis in. Vivamus iaculis neque pretium, porttitor purus quis, elementum sem. Pellentesque mollis ultricies hendrerit. Nunc ac dictum elit, ut malesuada mi. Donec fringilla eu nunc pellentesque aliquam. Nunc neque sem, interdum in ligula vitae, lobortis vestibulum neque. Praesent placerat gravida volutpat. "
input_splitted = re.split('(\s+)',input_text);
input_splitted_len = len(input_splitted)
punish_2d_array = [[None for _ in range(input_splitted_len)] for _ in range(input_splitted_len)]
print(input_splitted)
def punish(i,j, line_width, input_splitted):
if punish_2d_array[i][j] != None:
return punish_2d_array[i][j]
else:
input_splitted_fragment = input_splitted[i:j+1]
input_splitted_fragment_length = map(lambda x: len(x), \
input_splitted_fragment)
total_fragment_length = sum(input_splitted_fragment_length)
if line_width < total_fragment_length:
res = math.inf
else:
res = (line_width - total_fragment_length) ** 2
punish_2d_array[i][j] = res
return res
def line_break(input_splitted, line_width):
line_break_table = []
for i in range(len(input_splitted)):
# prev 指上一個斷句的詞 array index。
line_break_table.append({"prev": "", "value": math.inf})
def line_break_aux(n):
nonlocal line_break_table
if line_break_table[n]["value"] != math.inf:
return line_break_table[n]["value"]
i = 0 # 起始索引
if re.match('\s+', input_splitted[0]):
i = 1
total_punish_val = punish(0,n, line_width, input_splitted)
if total_punish_val == math.inf:
min_cost_array = [math.inf] * n
for j in range(i,n, 2):
rhs = punish(j+2,n, line_width, input_splitted)
if rhs == math.inf:
min_cost_array[j] = math.inf
else:
min_cost_array[j] = line_break_aux(j) + rhs
print("min_cost_array[%d] = %f" % (j , min_cost_array[j] ))
print(min_cost_array)
res = min(min_cost_array)
res_index = min_cost_array.index(res)
if line_break_table[n]["value"] > res:
line_break_table[n]["prev"] = res_index
line_break_table[n]["value"] = res
return res
else:
line_break_table[n]["value"] = total_punish_val
return total_punish_val
res = line_break_aux(len(input_splitted)-1)
print(line_break_table)
return [res,line_break_table]
def show_breaking_result(line_break_table, input_splitted):
breaking_chain_list = []
traveller = len(line_break_table) - 1
while(line_break_table[traveller]['prev'] != ''):
breaking_chain_list.append(line_break_table[traveller]['prev'])
traveller = line_break_table[traveller]['prev']
breaking_chain_list = [x for x in reversed(breaking_chain_list)] # 倒排
result_string = ""
head = -2
tail = 0
for i in breaking_chain_list:
tail = i
# 取 head 的下一個元素,以及 tail 後面的空白
fragment = input_splitted[head+2:tail+2]
fragment_combined_str = functools.reduce(lambda x, y: x + y,fragment,"")
fragment_combined_str += "\n"
result_string += fragment_combined_str
head = tail
result_string += functools.reduce(lambda x, y: x + y,input_splitted[head+2:],"")
print(result_string)
result = line_break(input_splitted, line_width)
show_breaking_result(result[1], input_splitted)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment