Last active
August 29, 2015 14:17
-
-
Save gwarser/dee93d09b0fd97d0d02b to your computer and use it in GitHub Desktop.
Czyści pliki z tekstami piosenek (lyrics *.lrc, unsynced lyrics *.txt) ze śmieci
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! python3 | |
# -*- coding: UTF-8 -*- | |
# | |
# Skrypt do poprawiania napisów | |
# | |
# Sposób użycia: | |
# Upuścić plik z napisami na ikonkę skryptu lub | |
# odpalić skrypt w folderze z napisami | |
# | |
import os, sys | |
import re | |
from pathlib import Path | |
from time import time | |
from difflib import unified_diff | |
class Replace(object): | |
def __init__(self, txt): | |
self.txt = txt | |
self.changes = 0 | |
#sub id (bez pytania) | |
self.txt, num = re.subn( | |
r'''(?:\n)?^(?: | |
\[(?=(id|ti|ar|al|by|la|lg|length|encoding)\s*:)[^\n]*| | |
(?:^|\[[\d:.]+\])(?=(tracktitle|title|song|artist|album|lead|music|lyrics|lrc|by)\s*:)[^\n]*| | |
(?:\ |ID:\ )\w{8}| | |
\[?Instrumental\]?| | |
(?:en)?coding:\s*(?:iso-8859|Windows|utf)-\d+ | |
)$(?:\n)?''', '\n', self.txt, flags=re.I|re.M|re.X) | |
self.changes += num | |
#linki (pyta czy usunąć) | |
self.txt, num = re.subn( | |
r'''(?:\n)?^ | |
[^\n]* | |
(?: | |
\.[^\S\n]*c[^\S\n]*o[^\S\n]*m\b| #.com | |
\.[^\S\n]*o[^\S\n]*r[^\S\n]*g\b| #.org | |
\.[^\S\n]*p[^\S\n]*l\b| #.pl | |
\bw[^\S\n]*w[^\S\n]*w\b| #www | |
\bh[^\S\n]*t[^\S\n]*t[^\S\n]*p\b| #http | |
@| #emails | |
(QQ|ICQ)[:\s]*\d+ #IM | |
) | |
[^\n]* | |
$(?:\n)?''', self.asktoremove, self.txt, flags=re.I|re.M|re.X) | |
self.changes += num | |
#' -> ' | |
self.txt, num = re.subn(r'\xef\xbc\x87', b"'", self.txt) | |
self.changes += num | |
#białe znaki na końcu linii | |
self.txt, num = re.subn( | |
r'''(?<!]) #not match after ']' (empty lines in .lrc) | |
[^\S\n]+ #\s without \n | |
$''', '', self.txt, flags=re.M|re.X) | |
self.changes += num | |
#wielokrotne nowe linie | |
self.txt, num = re.subn( | |
r'''^\n+| #multiple \n on file start | |
(?<=\n\n)\n| #2+ new lines | |
(?<=\n)\n+$ #\n on file end, not match if only one | |
''', '', self.txt, flags=re.X) | |
self.changes += num | |
#\n na końcu pliku jeśli brak | |
if len(self.txt) and (self.txt[-1] != '\n'): | |
self.txt += '\n' | |
self.changes += 1 | |
def asktoremove(self, match): | |
print('\n\t' + match.group(0)) | |
rem = input('Remove [T/n]?:').strip() | |
if 'N' in rem or 'n' in rem: | |
self.changes -= 1 | |
return match.group(0) | |
else: | |
return '\n' | |
def print_diff(afile, bfile): | |
af = afile.split('\n') | |
bf = bfile.split('\n') | |
for line in unified_diff(af, bf, n=3, lineterm=''): | |
print(line, file=sys.stderr) | |
def process(flist): | |
for sub in flist: | |
if not sub.is_file(): continue | |
try: | |
with sub.open('r', encoding='utf-8') as file: | |
lines = file.read() | |
except UnicodeDecodeError: | |
with sub.open('r') as file:#systemcp | |
lines = file.read() | |
if not lines: continue | |
replaced = Replace(lines) | |
if replaced.changes: | |
print(sub) | |
print('Changes:', replaced.changes) | |
print_diff(lines, replaced.txt) | |
with sub.open('w+', encoding='utf-8') as file: | |
file.write(replaced.txt) | |
print('\n') | |
def getlastmtime(): | |
try: | |
with open(sys.argv[0] + '.mtime', 'r') as f: | |
mtime = int(f.read().strip()) | |
except: | |
mtime = 0 | |
with open(sys.argv[0] + '.mtime', 'w+') as f: | |
f.write(str(int(time()))) | |
return mtime | |
def main(): | |
if len(sys.argv) > 1: | |
pathlist = [Path(p) for p in sys.argv[1:]] | |
txt = [x for x in pathlist if x.match('*.txt')] | |
lrc = [x for x in pathlist if x.match('*.lrc')] | |
else: | |
mtime = getlastmtime() | |
print('Scanning current directory tree!' + (' (new files only!)' if mtime else '')) | |
cdir = Path('.') | |
txt = [x for x in cdir.glob('*/*.txt') if x.stat().st_mtime > mtime] | |
lrc = [x for x in cdir.glob('*/*.lrc') if x.stat().st_mtime > mtime] | |
flist = txt + lrc | |
if not flist: return | |
process(flist) | |
os.system('pause') | |
if __name__ == '__main__': | |
try: | |
main() | |
except Exception: | |
import traceback | |
print("Unhandled exception:\n") | |
traceback.print_exc() | |
input('Press enter...') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment