Skip to content

Instantly share code, notes, and snippets.

@sandin
Created December 28, 2011 14:25
Show Gist options
  • Save sandin/1528109 to your computer and use it in GitHub Desktop.
Save sandin/1528109 to your computer and use it in GitHub Desktop.
download album cover
#!/usr/bin/env python
# -*- coding: utf8 -*-
#
# = Requirements =
# eyed3 [http://eyed3.nicfit.net/]
# BeautifulSoup [http://www.crummy.com/software/BeautifulSoup/]
#
# = 使用方法 =
# 将该脚本放置音乐文件夹下,运行 python getAlbumCover.py
# = 说明 =
# 该脚本会遍历该文件夹下所有的mp3文件(包括子目录),然后根据其ID3下载专辑封面, 并将封面图片以 `cover.jpg` 存到该MP3所在目录。
# 假设目录结构:
# --music
# --music/Ben Lee/The Rebirth of Venus
# --music/James Blunt/All The Lost Souls
# 将该脚本放置于music目录下,及其批量下载目录下所有专辑的封面。
import os, urllib, urllib2
from BeautifulSoup import BeautifulSoup
import eyeD3
class DoubanSpider(object):
def __init__(self):
pass
def searchMusic(self, str):
url = 'http://music.douban.com/subject_search?search_text=%s&cat=1003'
f = urllib.urlopen(url % str)
return f.read() # html
def getAlbumCover(self, album, saveAs):
html = self.searchMusic(album)
soup = BeautifulSoup(html)
resultItems = soup.find('a', {'class':'nbg'})
if not resultItems:
print 'Cann\'t find album %s' % album
return
for imgElem in resultItems.findAll('img'):
src = imgElem.get('src')
if src:
imageUrl = src.replace('spic', 'lpic')
print 'Got a image: %s' % imageUrl
urllib.urlretrieve(imageUrl, saveAs)
print 'Save it on %s' % saveAs
print '---------------------'
class DownloadAlbumCover(object):
filenameEncoding = 'GBK' # file system encoding
needToDownloadList = {}
def __init__(self):
self.walkDir()
self.downloadCovers()
def downloadCovers(self):
for album, dir in self.needToDownloadList.iteritems():
albumName = album.encode('utf-8')
#dir = dir.encode(self.filenameEncoding)
if os.path.isdir(dir):
filename = os.path.join(dir, 'cover.jpg')
if os.path.isfile(filename):
print 'already has cover'
continue
print 'Search internet for [%s] and try to download cover' % albumName
douban = DoubanSpider()
# only for debug
#filename = './cover/%s.jpg' % str(hash(filename[2:]))
douban.getAlbumCover(albumName, filename)
def walkDir(self):
for top, dirs, files in os.walk('./'):
for nm in files:
file = os.path.join(top, nm)
#print file
if file.endswith('.mp3'):
tag = eyeD3.Tag()
try:
tag.link(file)
except eyeD3.tag.TagException:
continue # try the next file
album = tag.getAlbum().strip()
artist = tag.getArtist().strip()
if not album:
continue # try the next file
album += ' ' + artist
if not self.needToDownloadList.has_key(album):
self.needToDownloadList[album] = os.path.dirname(file).decode(self.filenameEncoding,'ignore')
break # got album, no need to read files under the same dir
downloader = DownloadAlbumCover()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment