Skip to content

Instantly share code, notes, and snippets.

@KokoseiJ
Last active May 17, 2023 05:16
Show Gist options
  • Save KokoseiJ/ee7afd1456d18c33cf1d1ae9ce925dd6 to your computer and use it in GitHub Desktop.
Save KokoseiJ/ee7afd1456d18c33cf1d1ae9ce925dd6 to your computer and use it in GitHub Desktop.
dump USC song list from PIU USC site
import json
import requests
from bs4 import BeautifulSoup as bs
URL = "https://www.piugame.com/piu.ucs/ucs.sample/ucs.sample.alltunes.php?page={}"
PAGES = 11
def get_page(pagenum):
return requests.get(URL.format(pagenum)).text
def get_songlist(pagedata):
soup = bs(pagedata, "lxml")
return soup.find("table", {"class": "ucs_down_list"}).find_all("tr")[1:]
def format_songlist_to_json(songs):
return [
{
"code": song.find("span", {"class": "download_cs_number"}).text,
"title": song.find("span", {"class": "list_song_title"}).text,
"artist": song.find("span", {"list_song_artist"}).text[2:]
}
for song in songs
]
data = sum([
format_songlist_to_json(get_songlist(get_page(i+1)))
for i in range(PAGES)
], [])
data.sort(key=lambda x: int(x["code"][2:]))
print(json.dumps(data, indent=4))
with open("cs_db.json", "w") as f:
json.dump(data, f, ensure_ascii=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment