Skip to content

Instantly share code, notes, and snippets.

@gwarser
Last active October 2, 2015 16:48
Show Gist options
  • Save gwarser/2279563 to your computer and use it in GitHub Desktop.
Save gwarser/2279563 to your computer and use it in GitHub Desktop.
Skrypt do poprawiania napisów (MicroDVD, MPlayer2).
#! python3
# -*- coding: UTF-8 -*-
#
# Skrypt do poprawiania napisów (microdvd {123}, mplayer [123]).
# (Dostosowuje napisy z Napiprojekt do MPC-HC)
#
# Zmienia rozszerzenie txt na sub.
# Usuwa komentarze napiprojektu, subedita itp.
# Usuwa linijki zawierające linki (z podglądem).
# W napisach MicroDVD poprawia ukośniki (kursywa/italic) na znaki kontrolne {y:i}.
# W napisach Mplayer poprawia znaki kontrolne na ukośniki i usuwa inne (np kolor).
# Usuwa puste linie na końcu pliku.
#
# Sposób użycia:
# Upuścić plik z napisami na ikonkę skryptu lub
# odpalić skrypt w folderze z napisami (muszą mieć rozszerzenie txt)
#
import os, sys
import re
from fnmatch import filter as flt
from locale import getdefaultlocale, getpreferredencoding
sysenc = getpreferredencoding()
#syslang, sysenc = getdefaultlocale()
#syslang = '.' + syslang[:2]
print('System encoding:', sysenc)
fsenc = sys.getfilesystemencoding()
print('File system encoding:', fsenc)
print('\n')
def askremove(match):
print('', match.group(0))
rem = input('Remove [T/n]?:').strip()
if rem and rem[0] in 'Nn':
return match.group(0)
else:
return ''
def replace(txt):
chs = 0
#info
txt, num = re.subn(r'[[{]*\d+[][}{:]{1,2}\d+[]}:](?:\d+:)?(?:movie info:)?.*?(?:subedit b\.\d{4}|napiprojekt)[^\r\n]+[\r\n]+', r'', txt, flags=re.I)
chs += num
#WARNING: re.X - spaces removed from expression
txt, num = re.subn(
r'''[[{]*\d+[][}{:]{1,2}\d+[]}:](?:\d+:)?
[^\r\n]*
(?:
#hatak|
\bw\s*w\s*w\b|
\bh\s*t\s*t\s*p\b|
\.\s*pl\b|
\.\s*com\b|
\.\s*org\b
)
[^\r\n]*[\r\n]+
''', askremove, txt, flags=re.I|re.X)
chs += num
#linie na końcu
txt, num = re.subn(r'\s+$', r'', txt)
chs += num
#/ -> {y:i}
if re.search(r'^{\d+}{\d+}', txt):#microdvd
txt, num = re.subn(r'([}|])\s*((?:-\s*)?)/\s*', r'\1{y:i}\2', txt)
chs += num
elif re.search(r'^\[\d+\]\[\d+\]', txt):#mpl2
#{y:i} -> /
txt, num = re.subn(r'\s*{[yY]:i}', r'/', txt)
chs += num
#usuń znaki kontrolne
txt, num = re.subn(r'\s*{\w:[^}]+}', r'', txt)
chs += num
#końcowy /
txt, num = re.subn(r'\s*/\s*(\||\r|\n|$)', r'\1', txt)
chs += num
return txt, chs
def rename(path):
newp = os.path.splitext(path)[0]
if newp.endswith(syslang):
newp += '.sub'
else:
newp += syslang + '.sub'
if os.path.exists(newp):
print('Path exists:', newp)
if os.path.isfile(newp):
rem = input('Replace [T/n]?:').strip()
if rem and rem[0] in 'Nn':
return path
else:
try:
os.remove(newp)
return newp
except OSError as e:
print('{message} ({number})'.format(number=e.errno, message=e.strerror))
return path
else:
print('Not file...')
return path
try:
os.rename(path, newp)
except OSError as e:
print('{message} ({number})'.format(number=e.errno, message=e.strerror))
return path
return newp
def process(flist):
for sub in flist:
if not os.path.isfile(sub): continue
print(sub)
with open(sub, 'rb') as bomtest:
bom = bomtest.read(3)
if b'\xEF\xBB\xBF' == bom: #napiprojekt używa tylko utf-8-sig
print('Czytany jako utf-8-sig')
with open(sub, 'r', encoding='utf-8-sig', errors='replace') as file:
lines = file.read()
else:
with open(sub, 'r', encoding=sysenc, errors='replace') as file:
lines = file.read()
if not lines:
print('Empty file, skipped...')
continue
if re.search('[[{]*\d+[][}{:]{1,2}\d+[]}:](?:\d+:)?', lines):
lines, changes = replace(lines)
if changes:
print('Changes:', changes)
#xy-vobsub not detect text subtitles in .sub files
#skipped for now
#sub = rename(sub)
with open(sub, 'w', encoding=sysenc, errors='replace') as file:
file.write(lines)
else:
print('Not subtitle, skipped...')
print('\n')
def main():
if len(sys.argv) > 1:
flist = [os.path.normpath(p) for p in sys.argv[1:]]
txt = flt(flist, '*.txt')
sub = flt(flist, '*.sub')
flist = txt + sub
else:
flist = os.listdir(os.getcwd())
flist = flt(flist, '*.txt')
if not flist: return
process(flist)
os.system('pause')
if __name__ == '__main__':
try:
main()
except Exception:
import traceback
print("Unhandled exception:\n")
traceback.print_exc()
input('Press any key...')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment