sandin/getAlbumCover.py

## getAlbumCover.py
#!/usr/bin/env python
# -*- coding: utf8 -*-
#
# = Requirements =
# eyed3 [http://eyed3.nicfit.net/]
# BeautifulSoup [http://www.crummy.com/software/BeautifulSoup/]
#
# = 使用方法 =
# 将该脚本放置音乐文件夹下，运行 python getAlbumCover.py

# = 说明 =
# 该脚本会遍历该文件夹下所有的mp3文件（包括子目录），然后根据其ID3下载专辑封面, 并将封面图片以 `cover.jpg` 存到该MP3所在目录。
# 假设目录结构：
# --music
# --music/Ben Lee/The Rebirth of Venus
# --music/James Blunt/All The Lost Souls
# 将该脚本放置于music目录下，及其批量下载目录下所有专辑的封面。
import os, urllib, urllib2
from BeautifulSoup import BeautifulSoup
import eyeD3

class DoubanSpider(object):
    def __init__(self):
        pass

    def searchMusic(self, str):
        url = 'http://music.douban.com/subject_search?search_text=%s&cat=1003'
        f = urllib.urlopen(url % str)
        return f.read() # html

    def getAlbumCover(self, album, saveAs):
        html = self.searchMusic(album)
        soup = BeautifulSoup(html)
        resultItems = soup.find('a', {'class':'nbg'})
        if not resultItems:
            print 'Cann\'t find album %s' % album
            return
        for imgElem in resultItems.findAll('img'):
            src = imgElem.get('src')
            if src:
                imageUrl = src.replace('spic', 'lpic')
                print 'Got a image: %s' % imageUrl
                urllib.urlretrieve(imageUrl, saveAs)
                print 'Save it on %s' % saveAs
                print '---------------------'

class DownloadAlbumCover(object):
    filenameEncoding = 'GBK' # file system encoding
    needToDownloadList = {}

    def __init__(self):
        self.walkDir()
        self.downloadCovers()

    def downloadCovers(self):
        for album, dir in self.needToDownloadList.iteritems():
            albumName = album.encode('utf-8')
            #dir = dir.encode(self.filenameEncoding)
            if os.path.isdir(dir):
                filename = os.path.join(dir, 'cover.jpg')
                if os.path.isfile(filename):
                    print 'already has cover'
                    continue
                print 'Search internet for [%s] and try to download cover' % albumName
                douban = DoubanSpider()
                # only for debug
                #filename = './cover/%s.jpg' % str(hash(filename[2:]))
                douban.getAlbumCover(albumName, filename)

    def walkDir(self):
        for top, dirs, files in os.walk('./'):
            for nm in files:
                file = os.path.join(top, nm)
                #print file
                if file.endswith('.mp3'):
                    tag = eyeD3.Tag()
                    try:
                        tag.link(file)
                    except eyeD3.tag.TagException:
                        continue # try the next file
                    album = tag.getAlbum().strip()
                    artist = tag.getArtist().strip()
                    if not album:
                        continue # try the next file
                    album += ' ' + artist
                    if not self.needToDownloadList.has_key(album):
                        self.needToDownloadList[album] = os.path.dirname(file).decode(self.filenameEncoding,'ignore')
                        break # got album, no need to read files under the same dir

downloader = DownloadAlbumCover()
	#!/usr/bin/env python
	# -- coding: utf8 --
	#
	# = Requirements =
	# eyed3 [http://eyed3.nicfit.net/]
	# BeautifulSoup [http://www.crummy.com/software/BeautifulSoup/]
	#
	# = 使用方法 =
	# 将该脚本放置音乐文件夹下，运行 python getAlbumCover.py

	# = 说明 =
	# 该脚本会遍历该文件夹下所有的mp3文件（包括子目录），然后根据其ID3下载专辑封面, 并将封面图片以 `cover.jpg` 存到该MP3所在目录。
	# 假设目录结构：
	# --music
	# --music/Ben Lee/The Rebirth of Venus
	# --music/James Blunt/All The Lost Souls
	# 将该脚本放置于music目录下，及其批量下载目录下所有专辑的封面。
	import os, urllib, urllib2
	from BeautifulSoup import BeautifulSoup
	import eyeD3

	class DoubanSpider(object):
	def __init__(self):
	pass

	def searchMusic(self, str):
	url = 'http://music.douban.com/subject_search?search_text=%s&cat=1003'
	f = urllib.urlopen(url % str)
	return f.read() # html

	def getAlbumCover(self, album, saveAs):
	html = self.searchMusic(album)
	soup = BeautifulSoup(html)
	resultItems = soup.find('a', {'class':'nbg'})
	if not resultItems:
	print 'Cann\'t find album %s' % album
	return
	for imgElem in resultItems.findAll('img'):
	src = imgElem.get('src')
	if src:
	imageUrl = src.replace('spic', 'lpic')
	print 'Got a image: %s' % imageUrl
	urllib.urlretrieve(imageUrl, saveAs)
	print 'Save it on %s' % saveAs
	print '---------------------'

	class DownloadAlbumCover(object):
	filenameEncoding = 'GBK' # file system encoding
	needToDownloadList = {}

	def __init__(self):
	self.walkDir()
	self.downloadCovers()

	def downloadCovers(self):
	for album, dir in self.needToDownloadList.iteritems():
	albumName = album.encode('utf-8')
	#dir = dir.encode(self.filenameEncoding)
	if os.path.isdir(dir):
	filename = os.path.join(dir, 'cover.jpg')
	if os.path.isfile(filename):
	print 'already has cover'
	continue
	print 'Search internet for [%s] and try to download cover' % albumName
	douban = DoubanSpider()
	# only for debug
	#filename = './cover/%s.jpg' % str(hash(filename[2:]))
	douban.getAlbumCover(albumName, filename)

	def walkDir(self):
	for top, dirs, files in os.walk('./'):
	for nm in files:
	file = os.path.join(top, nm)
	#print file
	if file.endswith('.mp3'):
	tag = eyeD3.Tag()
	try:
	tag.link(file)
	except eyeD3.tag.TagException:
	continue # try the next file
	album = tag.getAlbum().strip()
	artist = tag.getArtist().strip()
	if not album:
	continue # try the next file
	album += ' ' + artist
	if not self.needToDownloadList.has_key(album):
	self.needToDownloadList[album] = os.path.dirname(file).decode(self.filenameEncoding,'ignore')
	break # got album, no need to read files under the same dir

	downloader = DownloadAlbumCover()