Last active
March 6, 2018 10:21
-
-
Save chun37/fc3700b4bc28b878990ea8d9b7078d38 to your computer and use it in GitHub Desktop.
歌ネットから歌詞取得するプログラム
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
def createurl(url, ep=""): | |
url += ep; url = url.replace("m//", "m/") if "//" in url else url; return url | |
songtitle = raw_input(">>>") | |
p = re.compile(r"<[^>]*?>") | |
p2 = re.compile(r'[\\|/|:|?|.|"|<|>|\|*|]') | |
baseurl = 'http://www.uta-net.com' | |
try: | |
tag_tbody = BeautifulSoup(requests.get(createurl(baseurl, "/search/"), params={"Keyword": songtitle, "Aselect": 2}).text, "lxml").find_all("tbody") | |
tag_tr = tag_tbody[0].find_all("tr") | |
data = [] | |
for i in tag_tr: | |
aaa = [] | |
td = i.find_all("td") | |
aaa.append(p.sub("", unicode(td[0]))) | |
aaa.append(p.sub("", unicode(td[1]))) | |
aaa.append(td[0].find("a")["href"]) | |
data.append(aaa) | |
except: | |
print "Not Found" | |
exit() | |
if len(data) != 1: | |
for i, x in enumerate(data): | |
print "%s: %s - %s" % (i, x[0], x[1]) | |
num = raw_input(">>>") | |
try: | |
num = int(num) | |
song = data[num] | |
except: | |
print "error" | |
exit() | |
else: | |
song = data[0] | |
text = "" | |
for i in BeautifulSoup(requests.get(createurl(baseurl, "/user/phplib/svg/showkasi.php"), params={"ID": song[2][6:-1]}).text, "lxml").find("g").find_all("text"): | |
text += p.sub("", str(i)) + "\n" | |
if "&" in text: | |
text = text.replace("&", "&") | |
if not os.path.isdir("lyrics"): | |
os.mkdir("lyrics") | |
with open("./lyrics/%s.txt" % p2.sub("-", song[0]), "w") as f: | |
f.write(text[:-1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment