Skip to content

Instantly share code, notes, and snippets.

@pochemuto
Created March 21, 2018 10:10
Show Gist options
  • Save pochemuto/32ea6245a10fee5956b2d9e9125d7068 to your computer and use it in GitHub Desktop.
Save pochemuto/32ea6245a10fee5956b2d9e9125d7068 to your computer and use it in GitHub Desktop.
Cleanup ypp podcast archive
#coding: utf
from mutagen.mp3 import MP3
from mutagen.easyid3 import EasyID3
from mutagen.id3 import ID3, TIT2, TIT3, TDES, TDRL, TimeStampTextFrame, ID3TimeStamp
from mutagen.id3._util import ID3NoHeaderError
from os import path
from datetime import datetime
import os
import re
import shutil
from datetime import timedelta
class NotFound(Exception):
def __init__(self, message):
super(Exception, self).__init__(message)
class Notes:
date_pattern = re.compile('(\d+) (\w+) (\d+) (\d+):(\d+)')
months = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
def __init__(self):
self.title = ''
self.date = ''
self.text = ''
self.number = 0
def date_from_str(self, strdate):
m = type(self).date_pattern.match(strdate)
month_str = m.group(2).lower()
month = type(self).months.index(month_str) + 1
self.date = datetime(int(m.group(3)), month, int(m.group(1)), int(m.group(4)), int(m.group(5)))
def __repr__(self):
return '{0} [{1}]: {2}'.format(self.title, self.date, self.text)
def get_number(title, mp3_path):
title = title.strip()
match = re.match('(Встреча|ЯПП|YPP|Выпуск|Пьянка|Подкаст|Шоу|Budam|Встерча|Янки после пьянки|ЯПП - После РТ|Будам)?\s*[#№]?([0-9]+)', title, flags=re.I)
if match:
return int(match.group(2))
match = re.match('Пьянка\s+#([0-9]+)', title)
if match:
return int(match.group(1))
if 'сорок четыре' in title:
return 44
if 'Представляюсь и ругаюсь' == title:
return 1
if 'ЯПП и Будам - Зачем нужны мужчины' == title:
return 270
if 'ЯПП и Будам - О музыке, птичках и собачках' == title:
return 525
if 'Ученые записки' == title:
return 384
if 'ЯПП и Будам - Что такое цивилизованная страна' == title:
return 534
if 'Записал подкаст Будам, то да се мы обсуждам' == title:
return -1 # файл все равно отсутствует
if 'Ученые записки № 2. О семье и браке' == title:
return 387
match = re.search('ypp([0-9]+)', mp3_path, re.I)
if match:
return int(match.group(1))
raise Exception("couldn't get number from " + title)
class Mp3:
def __init__(self, filepath):
self.filepath = filepath
try:
self.mp3 = EasyID3(filepath)
except ID3NoHeaderError:
self.mp3 = EasyID3()
def get(self, tag):
try:
return self.mp3[tag]
except KeyError:
return None
def save(self):
filepath = self.filepath
return self.mp3.save(filename=filepath, v1=2, v2_version=3)
def set(self, tag, value):
self.mp3[tag] = value
def read(filepath, mp3_path=None):
data = open(filepath, encoding='cp1251').readlines()
notes = Notes()
notes.title = data[0].strip()
notes.date_from_str(data[1].strip())
notes.text = ''.join(data[3:]).strip()
notes.number = get_number(notes.title, mp3_path)
return notes
def get_file_name(podcast_path):
files = os.listdir(podcast_path)
for filename in files:
if filename.endswith('.mp3'):
return path.realpath(path.join(podcast_path, filename))
raise NotFound('mp3 file not found in ' + podcast_path)
def pprint(mp3):
for tag in ['title', 'artist', 'date', 'description', 'releasedate']:
print('{0}: {1}'.format(tag, mp3.get(tag)))
def folder(podcast_path):
print('process ' + podcast_path)
mp3_path = get_file_name(podcast_path)
notes = read(path.join(podcast_path, 'text.txt'), mp3_path)
mp3 = Mp3(mp3_path)
mp3.set('title', notes.title)
mp3.set('artist', 'Янки после пьянки')
mp3.set('releasedate', notes.date.strftime('%Y-%m-%d %H:%M:%S'))
mp3.set('date', notes.date.strftime('%Y-%m-%d %H:%M:%S'))
mp3.set('description', notes.text)
mp3.set('title3', notes.text)
mp3.set('tracknumber', str(notes.number))
mp3.save()
print(mp3_path)
new_name = notes.date.strftime('%Y-%m-%d') + ' - ' + notes.title + '.mp3'
new_path = os.path.join(os.path.dirname(podcast_path), new_name)
if mp3_path != podcast_path:
shutil.move(mp3_path, new_path)
print('moved to ' + new_path)
def main(root_dir='.'):
print('processing ' + root_dir)
EasyID3.RegisterTextKey('description', 'TDES')
EasyID3.RegisterTextKey('title3', 'TIT3')
EasyID3.RegisterTextKey('releasedate', 'TDRL')
errors = 0
deleted = 0
processed = 0
for filename in sorted(os.listdir(root_dir)):
filename = os.path.join(root_dir, filename)
if path.isdir(filename):
if filename.endswith('_1') and path.isdir(filename[:-2]):
shutil.rmtree(filename)
print('deleted ' + filename)
deleted += 1
continue
# folder(path.join(root_dir, filename))
try:
folder(path.join(root_dir, filename))
processed += 1
except NotFound as e:
errors += 1
print('####### ' + filename + ': ' + str(type(e)) + " " + str(e))
except Exception as e:
print(filename)
raise e
if errors > 0:
print(f'got {errors} errors')
if deleted > 0:
print(f'deleted {errors} duplicates')
if processed > 0:
print(f'processed {processed} files')
print('done')
def bitrate(root_dir='.'):
def action(filename):
print(path.basename(filename) + ' ' + str(int(MP3(filename).info.bitrate / 1000)) + ' kbps')
process(action, root_dir)
def length(root_dir='.'):
def action(filename, context):
return context + MP3(filename).info.length
total = process(action, root_dir, context=0)
td = timedelta(seconds=total)
hours = td.seconds / 60 / 60
minutes = (td.seconds / 60) % 60
seconds = td.seconds % 60
print('{}:{}:{}'.format(int(td.days * 24 + hours), minutes, seconds))
def clean_filename(root_dir='.'):
def action(filename):
chars = r'\/:*?"<>|–'
for ch in chars:
if ch in path.basename(filename):
print(f'{filename} contains "{ch}"')
match = re.search(r'[^,()№ё!A-Za-z\d#.А-Яа-я- ]+', path.basename(filename))
if match:
print(f'{filename} contains something "{match.group()}"')
process(action, root_dir)
def process(action, root_dir, context=None):
dirs = []
for filename in sorted(os.listdir(root_dir)):
filename = os.path.join(root_dir, filename)
if path.isdir(filename):
dirs.append(filename)
elif path.isfile(filename) and filename.endswith('.mp3'):
if context is not None:
context = action(filename, context)
else:
action(filename)
for d in dirs:
if context is not None:
context = action(filename, context)
else:
action(filename)
return context
if __name__ == '__main__':
#main()
bitrate()
clean_filename()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment