butsugiri/parse_iTunes_XML.py

## parse_iTunes_XML.py
# -*- coding: utf-8 -*-
"""
Parse iTunes Library and Generate Word Cloud
"""
"""
Copy "iTunes Music Library.xml" to the same directory as Python script
"""
import matplotlib.pyplot as plt
from lxml import etree
from collections import defaultdict
from wordcloud import WordCloud

def parse_XML_by_PlayCount():
    with open("./iTunes.xml","r") as f:
        tree = etree.parse(f).xpath("//dict")
        play_count_flag = False
        artist_name_flag = False
        content_kind_flag = False
        podcast_flag = False
        content_kind = ""
        artist_name = None
        play_count = 0
        d = defaultdict(int)
        for elements in tree:
            for elem in elements:
                if elem.tag == "key" and elem.text =="Kind":
                    content_kind_flag = True
                    continue
                elif elem.tag == "key" and elem.text == "Play Count":
                    play_count_flag = True
                    continue
                elif elem.tag == "key" and elem.text == "Artist":
                    artist_name_flag = True
                    continue
                elif elem.tag == "key" and elem.text == "Podcast":
                    play_count_flag = False
                    artist_name_flag = False
                    content_kind_flag = False
                    podcast_flag = False
                    content_kind = ""
                    artist_name = None
                    play_count = 0
                    break
                if content_kind_flag:
                    content_kind_flag = False
                    content_kind = elem.text
                    continue
                elif play_count_flag:
                    play_count_flag = False
                    play_count = int(elem.text)
                    continue
                elif artist_name_flag:
                    artist_name_flag = False
                    artist_name = unicode(elem.text)
            if "app" in content_kind:
                content_kind = ""
                continue
            elif artist_name and podcast_flag == False:
                d[artist_name] += play_count
                artist_name = None
                play_count = 0
                content_kind = ""
                podcast_flag = False
    artist_tuple = []
    for artist,n in d.iteritems():
        artist_tuple.append((artist,n))
    return artist_tuple

def parse_XML_by_NofSongs():
    with open("./iTunes.xml","r") as f:
        tree = etree.parse(f).xpath("//dict")
        artist_name_flag = False
        content_kind_flag = False
        content_kind = ""
        artist_name = None
        d = defaultdict(int)
        for elements in tree:
            for elem in elements:
                if elem.text == "Artist":
                    artist_name_flag = True
                    continue
                elif elem.text == "Kind":
                    content_kind_flag = True
                    continue
                elif elem.text == "Podcast":
                    content_kind_flag = False
                    artist_name_flag = False
                    content_kind = ""
                    artist_name = None
                    break
                if artist_name_flag:
                    artist_name = unicode(elem.text)
                    artist_name_flag = False
                    continue
                elif content_kind_flag:
                    content_kind = elem.text
                    content_kind_flag = False
                    continue
            if "app" in content_kind:
                artist_name = None
                content_kind = ""
                continue
            else:
                if artist_name:
                    d[artist_name] += 1
                    artist_name = None
                    content_kind = ""
                else:
                    continue

    artist_tuple = []
    for artist,n in d.iteritems():
        artist_tuple.append((artist,n))
    return artist_tuple

if __name__ == "__main__":
    playCount = parse_XML_by_PlayCount()
    songsCount = parse_XML_by_NofSongs()

#同じwordcloudインスタンスを使うと，playCountCloud == songsCoundCloud → Trueになってしまう
#回避法がわからないので，別のインスタンスを定義
    fpath = "/System/Library/Fonts/ヒラギノ角ゴシック W5.ttc"
    wordcloud1 = WordCloud(font_path=fpath,width=800,height=600)
    wordcloud2 = WordCloud(font_path=fpath,width=800,height=600)
    playCountCloud = wordcloud1.generate_from_frequencies(playCount)
    songsCountCloud = wordcloud2.generate_from_frequencies(songsCount)
#再生回数から求めたワードクラウドを表示
    fig1 = plt.figure(dpi=150)
    ax1 = fig1.add_subplot(111)
    ax1.imshow(playCountCloud)
    ax1.axis("off")

#曲数から求めたワードクラウドを表示
    fig2 = plt.figure(dpi=150)
    ax2 = fig2.add_subplot(111)
    ax2.imshow(songsCountCloud)
    ax2.axis("off")
    plt.show()
	# -- coding: utf-8 --
	"""
	Parse iTunes Library and Generate Word Cloud
	"""
	"""
	Copy "iTunes Music Library.xml" to the same directory as Python script
	"""
	import matplotlib.pyplot as plt
	from lxml import etree
	from collections import defaultdict
	from wordcloud import WordCloud

	def parse_XML_by_PlayCount():
	with open("./iTunes.xml","r") as f:
	tree = etree.parse(f).xpath("//dict")
	play_count_flag = False
	artist_name_flag = False
	content_kind_flag = False
	podcast_flag = False
	content_kind = ""
	artist_name = None
	play_count = 0
	d = defaultdict(int)
	for elements in tree:
	for elem in elements:
	if elem.tag == "key" and elem.text =="Kind":
	content_kind_flag = True
	continue
	elif elem.tag == "key" and elem.text == "Play Count":
	play_count_flag = True
	continue
	elif elem.tag == "key" and elem.text == "Artist":
	artist_name_flag = True
	continue
	elif elem.tag == "key" and elem.text == "Podcast":
	play_count_flag = False
	artist_name_flag = False
	content_kind_flag = False
	podcast_flag = False
	content_kind = ""
	artist_name = None
	play_count = 0
	break
	if content_kind_flag:
	content_kind_flag = False
	content_kind = elem.text
	continue
	elif play_count_flag:
	play_count_flag = False
	play_count = int(elem.text)
	continue
	elif artist_name_flag:
	artist_name_flag = False
	artist_name = unicode(elem.text)
	if "app" in content_kind:
	content_kind = ""
	continue
	elif artist_name and podcast_flag == False:
	d[artist_name] += play_count
	artist_name = None
	play_count = 0
	content_kind = ""
	podcast_flag = False
	artist_tuple = []
	for artist,n in d.iteritems():
	artist_tuple.append((artist,n))
	return artist_tuple

	def parse_XML_by_NofSongs():
	with open("./iTunes.xml","r") as f:
	tree = etree.parse(f).xpath("//dict")
	artist_name_flag = False
	content_kind_flag = False
	content_kind = ""
	artist_name = None
	d = defaultdict(int)
	for elements in tree:
	for elem in elements:
	if elem.text == "Artist":
	artist_name_flag = True
	continue
	elif elem.text == "Kind":
	content_kind_flag = True
	continue
	elif elem.text == "Podcast":
	content_kind_flag = False
	artist_name_flag = False
	content_kind = ""
	artist_name = None
	break
	if artist_name_flag:
	artist_name = unicode(elem.text)
	artist_name_flag = False
	continue
	elif content_kind_flag:
	content_kind = elem.text
	content_kind_flag = False
	continue
	if "app" in content_kind:
	artist_name = None
	content_kind = ""
	continue
	else:
	if artist_name:
	d[artist_name] += 1
	artist_name = None
	content_kind = ""
	else:
	continue

	artist_tuple = []
	for artist,n in d.iteritems():
	artist_tuple.append((artist,n))
	return artist_tuple

	if __name__ == "__main__":
	playCount = parse_XML_by_PlayCount()
	songsCount = parse_XML_by_NofSongs()

	#同じwordcloudインスタンスを使うと，playCountCloud == songsCoundCloud → Trueになってしまう
	#回避法がわからないので，別のインスタンスを定義
	fpath = "/System/Library/Fonts/ヒラギノ角ゴシック W5.ttc"
	wordcloud1 = WordCloud(font_path=fpath,width=800,height=600)
	wordcloud2 = WordCloud(font_path=fpath,width=800,height=600)
	playCountCloud = wordcloud1.generate_from_frequencies(playCount)
	songsCountCloud = wordcloud2.generate_from_frequencies(songsCount)
	#再生回数から求めたワードクラウドを表示
	fig1 = plt.figure(dpi=150)
	ax1 = fig1.add_subplot(111)
	ax1.imshow(playCountCloud)
	ax1.axis("off")

	#曲数から求めたワードクラウドを表示
	fig2 = plt.figure(dpi=150)
	ax2 = fig2.add_subplot(111)
	ax2.imshow(songsCountCloud)
	ax2.axis("off")
	plt.show()