Skip to content

Instantly share code, notes, and snippets.

@butsugiri
Last active February 12, 2016 12:18
Show Gist options
  • Save butsugiri/da8c4f719d6e7ba5a6a4 to your computer and use it in GitHub Desktop.
Save butsugiri/da8c4f719d6e7ba5a6a4 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Parse iTunes Library and Generate Word Cloud
"""
"""
Copy "iTunes Music Library.xml" to the same directory as Python script
"""
import matplotlib.pyplot as plt
from lxml import etree
from collections import defaultdict
from wordcloud import WordCloud
def parse_XML_by_PlayCount():
with open("./iTunes.xml","r") as f:
tree = etree.parse(f).xpath("//dict")
play_count_flag = False
artist_name_flag = False
content_kind_flag = False
podcast_flag = False
content_kind = ""
artist_name = None
play_count = 0
d = defaultdict(int)
for elements in tree:
for elem in elements:
if elem.tag == "key" and elem.text =="Kind":
content_kind_flag = True
continue
elif elem.tag == "key" and elem.text == "Play Count":
play_count_flag = True
continue
elif elem.tag == "key" and elem.text == "Artist":
artist_name_flag = True
continue
elif elem.tag == "key" and elem.text == "Podcast":
play_count_flag = False
artist_name_flag = False
content_kind_flag = False
podcast_flag = False
content_kind = ""
artist_name = None
play_count = 0
break
if content_kind_flag:
content_kind_flag = False
content_kind = elem.text
continue
elif play_count_flag:
play_count_flag = False
play_count = int(elem.text)
continue
elif artist_name_flag:
artist_name_flag = False
artist_name = unicode(elem.text)
if "app" in content_kind:
content_kind = ""
continue
elif artist_name and podcast_flag == False:
d[artist_name] += play_count
artist_name = None
play_count = 0
content_kind = ""
podcast_flag = False
artist_tuple = []
for artist,n in d.iteritems():
artist_tuple.append((artist,n))
return artist_tuple
def parse_XML_by_NofSongs():
with open("./iTunes.xml","r") as f:
tree = etree.parse(f).xpath("//dict")
artist_name_flag = False
content_kind_flag = False
content_kind = ""
artist_name = None
d = defaultdict(int)
for elements in tree:
for elem in elements:
if elem.text == "Artist":
artist_name_flag = True
continue
elif elem.text == "Kind":
content_kind_flag = True
continue
elif elem.text == "Podcast":
content_kind_flag = False
artist_name_flag = False
content_kind = ""
artist_name = None
break
if artist_name_flag:
artist_name = unicode(elem.text)
artist_name_flag = False
continue
elif content_kind_flag:
content_kind = elem.text
content_kind_flag = False
continue
if "app" in content_kind:
artist_name = None
content_kind = ""
continue
else:
if artist_name:
d[artist_name] += 1
artist_name = None
content_kind = ""
else:
continue
artist_tuple = []
for artist,n in d.iteritems():
artist_tuple.append((artist,n))
return artist_tuple
if __name__ == "__main__":
playCount = parse_XML_by_PlayCount()
songsCount = parse_XML_by_NofSongs()
#同じwordcloudインスタンスを使うと,playCountCloud == songsCoundCloud → Trueになってしまう
#回避法がわからないので,別のインスタンスを定義
fpath = "/System/Library/Fonts/ヒラギノ角ゴシック W5.ttc"
wordcloud1 = WordCloud(font_path=fpath,width=800,height=600)
wordcloud2 = WordCloud(font_path=fpath,width=800,height=600)
playCountCloud = wordcloud1.generate_from_frequencies(playCount)
songsCountCloud = wordcloud2.generate_from_frequencies(songsCount)
#再生回数から求めたワードクラウドを表示
fig1 = plt.figure(dpi=150)
ax1 = fig1.add_subplot(111)
ax1.imshow(playCountCloud)
ax1.axis("off")
#曲数から求めたワードクラウドを表示
fig2 = plt.figure(dpi=150)
ax2 = fig2.add_subplot(111)
ax2.imshow(songsCountCloud)
ax2.axis("off")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment