wareya/kssize.py

## kssize.py

# Modified to spit out main script content in utf-8
# use "Phiber's Kirikiri tool. File:Kikiriki.rar" from https://tlwiki.org/?title=Tools#KiriKiri2.2FKAG3 to extract script files from realta nua's vanilla .exe files
# copy said scripts all into the same folder (ovewrite or don't copy files with the same name, do not rename) and delete non-script ones (menu, macro, subroutine, etc)

# krkr line counter script
#
# How to run:
#
#    (1) Organize game scripts into directories one level below where this script resides.
#    (2) Run the script, size data is printed to standard out
#
#    Script output is in the following format:
#        <script_name> <line_count> <size_in_bytes>
#
#    Total sizes for script categories (based on directories) and grand total are also
#    listed and are prefixed by * and ** respectively.
#
#    Line counting is a bit tricky with krkr scripts because it is difficult to define
#    what exactly is a line from the scripts alone. This script attempts to deal with
#    this by having different methods of counting (see COUNT_MODE below).

import glob
import os
import re
import sys


# count mode
#     1: count each text line in script file
#     2: count line after upon reaching a non-text line following consecutive text lines
#     4: count instances of [l]
#     8: count instances of [r]

COUNT_MODE = 1

fmt = re.compile(r'\[[^\[\]]*\]')
ruby3a = re.compile(r'(\[ruby char=)([^\]]*?)( text=)([^\]]*?)(\])')
ruby3b = re.compile(r'(\[ruby char=)([^\]]*?)( text=")([^\]]*?)("\])')
ruby2 = re.compile(r'(\[ruby text=)([^\]]*?)( char=)(.*?)(\])')
ruby1 = re.compile(r'(\[ruby text=)([^ ]*?)(( .*?\])|(\]))')

#unsure: retainhaze, stopdash
etc = re.compile(r'\[(wsay|wacky|se|sestop|seloop|wait|chgfg|movefg|move|flicker|shock|fadein|p|transex|quake|dash|noise|say|stopnoise|wm|wdash|rep|stopdash|clfg|playstop|retainhaze|ld)(| .*?)\]')

cmd = re.compile(r'^@.*')
cmt = re.compile(r'^\;.*')
lbl = re.compile(r'^\*.*')
spk = re.compile(r'【.*】')

def is_command(s):
	return cmd.match(s)

def is_comment(s):
	return cmt.match(s)

def is_label(s):
	return lbl.match(s)

def is_iscript(s):
	return is_command(s) and s.find('@iscript') == 0

def is_endscript(s):
	return is_command(s) and s.find('@endscript') == 0

def is_text(s,in_script):
	return not in_script and not (is_command(s) or is_label(s) or is_comment(s))

def remove_formatting(s):
	#chuuni
	#these are misspelled in FSN in ways that literally break the greek writing system, so the "intended" name is here
	s = s.replace("[atlas]", "Ατλας")
	s = s.replace("[margos]", "Μαρδοξ")
	s = s.replace("[aero]", "Αερο")
	s = s.replace("[troya]", "Tροψα")
	s = s.replace("[keraino]", "Κεραινο")
	#out of encoding
	s = s.replace("[szlig]", "ß")
	s = s.replace("[uuml]", "ü")
	s = s.replace("[auml]", "ä")
	s = s.replace("[heart]", "❤")
	s = s.replace("[XAuml]", "ö") #Ö in FSN but it should be ö

	#drawing
	s = s.replace("[block len=12]", "████████████")
	s = s.replace("[block len=9]", "█████████")
	s = s.replace("[block len=3]", "███")
	s = s.replace("[block len=2]", "██")
	s = s.replace("[line25]", "―――――――――――――――――――――――――")
	s = s.replace("[line24]", "――――――――――――――――――――――――")
	s = s.replace("[line23]", "―――――――――――――――――――――――")
	s = s.replace("[line22]", "――――――――――――――――――――――")
	s = s.replace("[line21]", "―――――――――――――――――――――")
	s = s.replace("[line20]", "――――――――――――――――――――")
	s = s.replace("[line19]", "―――――――――――――――――――")
	s = s.replace("[line18]", "――――――――――――――――――")
	s = s.replace("[line17]", "―――――――――――――――――")
	s = s.replace("[line16]", "――――――――――――――――")
	s = s.replace("[line15]", "―――――――――――――――")
	s = s.replace("[line14]", "――――――――――――――")
	s = s.replace("[line13]", "―――――――――――――")
	s = s.replace("[line12]", "――――――――――――")
	s = s.replace("[line11]", "―――――――――――")
	s = s.replace("[line10]", "――――――――――")
	s = s.replace("[line9]", "―――――――――")
	s = s.replace("[line8]", "――――――――")
	s = s.replace("[line7]", "―――――――")
	s = s.replace("[line6]", "――――――")
	s = s.replace("[line5]", "―――――")
	s = s.replace("[line4]", "――――")
	s = s.replace("[line3]", "―――")
	s = s.replace("[line2]", "――")
	s = s.replace("[line1]", "―")
	s = s.replace("[line len=8]", "――――――――")
	s = s.replace("[line len=5]", "―――――")
	s = s.replace("[line len=4]", "――――")
	s = s.replace("[line len=3]", "―――")

	#newline type control
	s = s.replace("[l]", "\n")
	s = s.replace("[r]", "\n")
	s = s.replace("[lr]", "\n")
	s = s.replace("[rf]", "\n")
	s = s.replace("[br]", "\n")
	s = s.replace("[nolr]", "")

	#ruby
	s = ruby3a.sub(r"《\4:\2》", s)
	s = ruby3b.sub(r"《\4:\2》", s)
	s = ruby2.sub(r"《\2:\4》", s)
	s = ruby1.sub(r"《\2》", s)

	#other formatting
	s = etc.sub(r"", s)
	#s = ''.join(fmt.split(s))

	s = ''.join(cmd.split(s))
	s = ''.join(cmt.split(s))
	s = ''.join(lbl.split(s))
	s = ''.join(spk.split(s))
	return s

outfile = open("out.txt", "w+", encoding="utf-8")

def txtsize(fi):
	in_script = False
	lines = size = 0
	prevline_t = ''
	for line in fi.readlines():
		line_s = line.strip()
		if is_endscript(line_s):
			in_script = False
		if not in_script:
			line_t = remove_formatting(line_s).strip()
			if line_t:
				try:
					size += len(line_t.encode('cp932'))
				except:
					size += len(line_t.encode('utf-8'))
			if COUNT_MODE & 0x1:
				if line_t:
					lines += 1
					outfile.write("%s\n" % (line_t))
			if COUNT_MODE & 0x2:
				if not line_t and prevline_t:
					lines += 1
			if COUNT_MODE & 0x4:
				if line_s.find('[l]') >= 0:
					lines += 1
			if COUNT_MODE & 0x8:
				if line_s.find('[r]') >= 0:
					lines += 1
		if is_iscript(line_s):
			in_script = True
		prevline_t = line_t
	return lines,size

def get_encoding(fname):
	data = open(fname,'rb').read(2)
	if data == b'\xFF\xFE' or data == b'\xFE\xFF':
		return 'utf-16'
	else:
		return 'cp932'

if __name__ == '__main__':
	glines = gsize = 0
	for dirpath, dirnames, filenames in os.walk('.'):
		if dirpath == '.':
			continue
		tlines = tsize = 0
		for file in glob.glob(os.path.join(dirpath,'*.ks')):
			lines,size = txtsize(open(file,'r',encoding=get_encoding(file)))
			print('%s %6d %8d' % (os.path.split(file)[1].ljust(32),lines,size))
			tlines += lines
			tsize += size
		print('*%s %6d %8d' % (os.path.split(dirpath)[1].ljust(31),tlines,tsize))
		print('')
		glines += tlines
		gsize += tsize
	print('**%s %6d %8d' % ('total'.ljust(30),glines,gsize))

	# Modified to spit out main script content in utf-8
	# use "Phiber's Kirikiri tool. File:Kikiriki.rar" from https://tlwiki.org/?title=Tools#KiriKiri2.2FKAG3 to extract script files from realta nua's vanilla .exe files
	# copy said scripts all into the same folder (ovewrite or don't copy files with the same name, do not rename) and delete non-script ones (menu, macro, subroutine, etc)

	# krkr line counter script
	#
	# How to run:
	#
	# (1) Organize game scripts into directories one level below where this script resides.
	# (2) Run the script, size data is printed to standard out
	#
	# Script output is in the following format:
	# <script_name> <line_count> <size_in_bytes>
	#
	# Total sizes for script categories (based on directories) and grand total are also
	# listed and are prefixed by * and ** respectively.
	#
	# Line counting is a bit tricky with krkr scripts because it is difficult to define
	# what exactly is a line from the scripts alone. This script attempts to deal with
	# this by having different methods of counting (see COUNT_MODE below).

	import glob
	import os
	import re
	import sys


	# count mode
	# 1: count each text line in script file
	# 2: count line after upon reaching a non-text line following consecutive text lines
	# 4: count instances of [l]
	# 8: count instances of [r]

	COUNT_MODE = 1

	fmt = re.compile(r'\[[^\[\]]*\]')
	ruby3a = re.compile(r'(\[ruby char=)([^\]]?)( text=)([^\]]?)(\])')
	ruby3b = re.compile(r'(\[ruby char=)([^\]]?)( text=")([^\]]?)("\])')
	ruby2 = re.compile(r'(\[ruby text=)([^\]]?)( char=)(.?)(\])')
	ruby1 = re.compile(r'(\[ruby text=)([^ ]?)(( .?\])\|(\]))')

	#unsure: retainhaze, stopdash
	etc = re.compile(r'\[(wsay\|wacky\|se\|sestop\|seloop\|wait\|chgfg\|movefg\|move\|flicker\|shock\|fadein\|p\|transex\|quake\|dash\|noise\|say\|stopnoise\|wm\|wdash\|rep\|stopdash\|clfg\|playstop\|retainhaze\|ld)(\| .*?)\]')

	cmd = re.compile(r'^@.*')
	cmt = re.compile(r'^\;.*')
	lbl = re.compile(r'^\.')
	spk = re.compile(r'【.*】')

	def is_command(s):
	return cmd.match(s)

	def is_comment(s):
	return cmt.match(s)

	def is_label(s):
	return lbl.match(s)

	def is_iscript(s):
	return is_command(s) and s.find('@iscript') == 0

	def is_endscript(s):
	return is_command(s) and s.find('@endscript') == 0

	def is_text(s,in_script):
	return not in_script and not (is_command(s) or is_label(s) or is_comment(s))

	def remove_formatting(s):
	#chuuni
	#these are misspelled in FSN in ways that literally break the greek writing system, so the "intended" name is here
	s = s.replace("[atlas]", "Ατλας")
	s = s.replace("[margos]", "Μαρδοξ")
	s = s.replace("[aero]", "Αερο")
	s = s.replace("[troya]", "Tροψα")
	s = s.replace("[keraino]", "Κεραινο")
	#out of encoding
	s = s.replace("[szlig]", "ß")
	s = s.replace("[uuml]", "ü")
	s = s.replace("[auml]", "ä")
	s = s.replace("[heart]", "❤")
	s = s.replace("[XAuml]", "ö") #Ö in FSN but it should be ö

	#drawing
	s = s.replace("[block len=12]", "████████████")
	s = s.replace("[block len=9]", "█████████")
	s = s.replace("[block len=3]", "███")
	s = s.replace("[block len=2]", "██")
	s = s.replace("[line25]", "―――――――――――――――――――――――――")
	s = s.replace("[line24]", "――――――――――――――――――――――――")
	s = s.replace("[line23]", "―――――――――――――――――――――――")
	s = s.replace("[line22]", "――――――――――――――――――――――")
	s = s.replace("[line21]", "―――――――――――――――――――――")
	s = s.replace("[line20]", "――――――――――――――――――――")
	s = s.replace("[line19]", "―――――――――――――――――――")
	s = s.replace("[line18]", "――――――――――――――――――")
	s = s.replace("[line17]", "―――――――――――――――――")
	s = s.replace("[line16]", "――――――――――――――――")
	s = s.replace("[line15]", "―――――――――――――――")
	s = s.replace("[line14]", "――――――――――――――")
	s = s.replace("[line13]", "―――――――――――――")
	s = s.replace("[line12]", "――――――――――――")
	s = s.replace("[line11]", "―――――――――――")
	s = s.replace("[line10]", "――――――――――")
	s = s.replace("[line9]", "―――――――――")
	s = s.replace("[line8]", "――――――――")
	s = s.replace("[line7]", "―――――――")
	s = s.replace("[line6]", "――――――")
	s = s.replace("[line5]", "―――――")
	s = s.replace("[line4]", "――――")
	s = s.replace("[line3]", "―――")
	s = s.replace("[line2]", "――")
	s = s.replace("[line1]", "―")
	s = s.replace("[line len=8]", "――――――――")
	s = s.replace("[line len=5]", "―――――")
	s = s.replace("[line len=4]", "――――")
	s = s.replace("[line len=3]", "―――")

	#newline type control
	s = s.replace("[l]", "\n")
	s = s.replace("[r]", "\n")
	s = s.replace("[lr]", "\n")
	s = s.replace("[rf]", "\n")
	s = s.replace("[br]", "\n")
	s = s.replace("[nolr]", "")

	#ruby
	s = ruby3a.sub(r"《\4:\2》", s)
	s = ruby3b.sub(r"《\4:\2》", s)
	s = ruby2.sub(r"《\2:\4》", s)
	s = ruby1.sub(r"《\2》", s)

	#other formatting
	s = etc.sub(r"", s)
	#s = ''.join(fmt.split(s))

	s = ''.join(cmd.split(s))
	s = ''.join(cmt.split(s))
	s = ''.join(lbl.split(s))
	s = ''.join(spk.split(s))
	return s

	outfile = open("out.txt", "w+", encoding="utf-8")

	def txtsize(fi):
	in_script = False
	lines = size = 0
	prevline_t = ''
	for line in fi.readlines():
	line_s = line.strip()
	if is_endscript(line_s):
	in_script = False
	if not in_script:
	line_t = remove_formatting(line_s).strip()
	if line_t:
	try:
	size += len(line_t.encode('cp932'))
	except:
	size += len(line_t.encode('utf-8'))
	if COUNT_MODE & 0x1:
	if line_t:
	lines += 1
	outfile.write("%s\n" % (line_t))
	if COUNT_MODE & 0x2:
	if not line_t and prevline_t:
	lines += 1
	if COUNT_MODE & 0x4:
	if line_s.find('[l]') >= 0:
	lines += 1
	if COUNT_MODE & 0x8:
	if line_s.find('[r]') >= 0:
	lines += 1
	if is_iscript(line_s):
	in_script = True
	prevline_t = line_t
	return lines,size

	def get_encoding(fname):
	data = open(fname,'rb').read(2)
	if data == b'\xFF\xFE' or data == b'\xFE\xFF':
	return 'utf-16'
	else:
	return 'cp932'

	if __name__ == '__main__':
	glines = gsize = 0
	for dirpath, dirnames, filenames in os.walk('.'):
	if dirpath == '.':
	continue
	tlines = tsize = 0
	for file in glob.glob(os.path.join(dirpath,'*.ks')):
	lines,size = txtsize(open(file,'r',encoding=get_encoding(file)))
	print('%s %6d %8d' % (os.path.split(file)[1].ljust(32),lines,size))
	tlines += lines
	tsize += size
	print('*%s %6d %8d' % (os.path.split(dirpath)[1].ljust(31),tlines,tsize))
	print('')
	glines += tlines
	gsize += tsize
	print('**%s %6d %8d' % ('total'.ljust(30),glines,gsize))