Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@gwarser
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gwarser/dee93d09b0fd97d0d02b to your computer and use it in GitHub Desktop.
Save gwarser/dee93d09b0fd97d0d02b to your computer and use it in GitHub Desktop.
Czyści pliki z tekstami piosenek (lyrics *.lrc, unsynced lyrics *.txt) ze śmieci
#! python3
# -*- coding: UTF-8 -*-
#
# Skrypt do poprawiania napisów
#
# Sposób użycia:
# Upuścić plik z napisami na ikonkę skryptu lub
# odpalić skrypt w folderze z napisami
#
import os, sys
import re
from pathlib import Path
from time import time
from difflib import unified_diff
class Replace(object):
def __init__(self, txt):
self.txt = txt
self.changes = 0
#sub id (bez pytania)
self.txt, num = re.subn(
r'''(?:\n)?^(?:
\[(?=(id|ti|ar|al|by|la|lg|length|encoding)\s*:)[^\n]*|
(?:^|\[[\d:.]+\])(?=(tracktitle|title|song|artist|album|lead|music|lyrics|lrc|by)\s*:)[^\n]*|
(?:\ |ID:\ )\w{8}|
\[?Instrumental\]?|
(?:en)?coding:\s*(?:iso-8859|Windows|utf)-\d+
)$(?:\n)?''', '\n', self.txt, flags=re.I|re.M|re.X)
self.changes += num
#linki (pyta czy usunąć)
self.txt, num = re.subn(
r'''(?:\n)?^
[^\n]*
(?:
\.[^\S\n]*c[^\S\n]*o[^\S\n]*m\b| #.com
\.[^\S\n]*o[^\S\n]*r[^\S\n]*g\b| #.org
\.[^\S\n]*p[^\S\n]*l\b| #.pl
\bw[^\S\n]*w[^\S\n]*w\b| #www
\bh[^\S\n]*t[^\S\n]*t[^\S\n]*p\b| #http
@| #emails
(QQ|ICQ)[:\s]*\d+ #IM
)
[^\n]*
$(?:\n)?''', self.asktoremove, self.txt, flags=re.I|re.M|re.X)
self.changes += num
#' -> '
self.txt, num = re.subn(r'\xef\xbc\x87', b"'", self.txt)
self.changes += num
#białe znaki na końcu linii
self.txt, num = re.subn(
r'''(?<!]) #not match after ']' (empty lines in .lrc)
[^\S\n]+ #\s without \n
$''', '', self.txt, flags=re.M|re.X)
self.changes += num
#wielokrotne nowe linie
self.txt, num = re.subn(
r'''^\n+| #multiple \n on file start
(?<=\n\n)\n| #2+ new lines
(?<=\n)\n+$ #\n on file end, not match if only one
''', '', self.txt, flags=re.X)
self.changes += num
#\n na końcu pliku jeśli brak
if len(self.txt) and (self.txt[-1] != '\n'):
self.txt += '\n'
self.changes += 1
def asktoremove(self, match):
print('\n\t' + match.group(0))
rem = input('Remove [T/n]?:').strip()
if 'N' in rem or 'n' in rem:
self.changes -= 1
return match.group(0)
else:
return '\n'
def print_diff(afile, bfile):
af = afile.split('\n')
bf = bfile.split('\n')
for line in unified_diff(af, bf, n=3, lineterm=''):
print(line, file=sys.stderr)
def process(flist):
for sub in flist:
if not sub.is_file(): continue
try:
with sub.open('r', encoding='utf-8') as file:
lines = file.read()
except UnicodeDecodeError:
with sub.open('r') as file:#systemcp
lines = file.read()
if not lines: continue
replaced = Replace(lines)
if replaced.changes:
print(sub)
print('Changes:', replaced.changes)
print_diff(lines, replaced.txt)
with sub.open('w+', encoding='utf-8') as file:
file.write(replaced.txt)
print('\n')
def getlastmtime():
try:
with open(sys.argv[0] + '.mtime', 'r') as f:
mtime = int(f.read().strip())
except:
mtime = 0
with open(sys.argv[0] + '.mtime', 'w+') as f:
f.write(str(int(time())))
return mtime
def main():
if len(sys.argv) > 1:
pathlist = [Path(p) for p in sys.argv[1:]]
txt = [x for x in pathlist if x.match('*.txt')]
lrc = [x for x in pathlist if x.match('*.lrc')]
else:
mtime = getlastmtime()
print('Scanning current directory tree!' + (' (new files only!)' if mtime else ''))
cdir = Path('.')
txt = [x for x in cdir.glob('*/*.txt') if x.stat().st_mtime > mtime]
lrc = [x for x in cdir.glob('*/*.lrc') if x.stat().st_mtime > mtime]
flist = txt + lrc
if not flist: return
process(flist)
os.system('pause')
if __name__ == '__main__':
try:
main()
except Exception:
import traceback
print("Unhandled exception:\n")
traceback.print_exc()
input('Press enter...')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment