Created
January 20, 2024 01:34
-
-
Save taroyanaka/df7f4ad17ff01e5c6be2a78b0ce6b7b4 to your computer and use it in GitHub Desktop.
all year billboard_100 ranking data getter with node.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 同じディレクトリのほかの.jのファイルを読み込むコード | |
// const wiki_bil_file = require('./wiki_bil.js'); | |
// wiki_bil_fileの中身をwiki_bilに代入 | |
// const wiki_bil = wiki_bil_file['wiki_bil']; | |
// console.log(wiki_bil[0]); | |
const wiki_bil = | |
[ | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_singles_of_1946", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_singles_of_1947", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_singles_of_1948", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1949", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1951", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1952", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1953", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1954", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_30_singles_of_1955", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_50_singles_of_1956", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_50_singles_of_1957", | |
"https://en.wikipedia.org/wiki/Billboard_year-end_top_50_singles_of_1958", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1959", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1960", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1961", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1962", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1963", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1964", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1965", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1966", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1967", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1968", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1969", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1970", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1971", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1972", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1973", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1974", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1975", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1976", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1977", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1978", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1979", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1980", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1981", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1982", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1983", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1984", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1985", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1986", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1987", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1988", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1989", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1990", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1991", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1992", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1993", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1994", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1995", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1996", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1997", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1998", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_1999", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2000", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2001", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2002", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2003", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2004", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2005", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2006", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2007", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2008", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2009", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2010", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2011", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2012", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2013", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2014", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2015", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2016", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2017", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2018", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2019", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2020", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2021", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2022", | |
"https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_2023", | |
]; | |
// exportする | |
// module.exports = { wiki_bil }; | |
const axios = require('axios'); | |
const cheerio = require('cheerio'); | |
const fs = require('fs/promises'); | |
// const url = 'https://en.wikipedia.org/wiki/List_of_Billboard_Year-End_number-one_singles_and_albums'; | |
// const url = wiki_bil[50]; | |
const all_url = wiki_bil; | |
// スクレイピング関数 | |
async function scrapeData(url) { | |
try { | |
const response = await axios.get(url); | |
const html = response.data; | |
const $ = cheerio.load(html); | |
// 表のデータを取得 | |
const tableRows = $('.wikitable tbody tr'); | |
// 結果を格納する配列 | |
const resultArray = []; | |
// 表の各行を処理 | |
tableRows.each((index, element) => { | |
const columns = $(element).find('td'); | |
// 列ごとにデータを取得 | |
const Title = columns.eq(1).text().trim().replace(/\[\d+\]/g, ''); | |
const Artist = columns.eq(2).text().trim().replace(/\[\d+\]/g, ''); | |
// const rbAlbum = columns.eq(3).text().trim().replace(/\[\d+\]/g, ''); | |
// const hipHopSingle = columns.eq(4).text().trim().replace(/\[\d+\]/g, ''); | |
// const hipHopAlbum = columns.eq(5).text().trim().replace(/\[\d+\]/g, ''); | |
// const countrySingle = columns.eq(6).text().trim().replace(/\[\d+\]/g, ''); | |
// const countryAlbum = columns.eq(7).text().trim().replace(/\[\d+\]/g, ''); | |
// const year = columns.eq(8).text().trim().replace(/\[\d+\]/g, ''); | |
// ファイル名から何年かを取得(例: 1946、末尾4桁の数字) 文字列ではなく整数にする | |
const Year_Str = url.match(/\d{4}$/)[0]; | |
const Year = parseInt(Year_Str, 10); | |
// 一つ目の列はカラムのメタデータ(No. (Rank) Title Artist(s))なのでskip | |
if (index === 0) return true; | |
// データを配列に追加 | |
resultArray.push({ | |
year: Year, | |
// rank: rank, | |
rank: index, | |
title: Title, | |
artist: Artist, | |
// rbAlbum: rbAlbum, | |
// hipHopSingle: hipHopSingle, | |
// hipHopAlbum: hipHopAlbum, | |
// countrySingle: countrySingle, | |
// countryAlbum: countryAlbum, | |
}); | |
}); | |
return resultArray; | |
} catch (error) { | |
console.error('Error fetching data:', error); | |
throw error; | |
} | |
} | |
// メインの処理 | |
async function main() { | |
console.log('Scraping data from Wikipedia...'); | |
// const data = await scrapeData(url); | |
// all_urlの中身を一つずつ処理して全部書き込む | |
let data = []; | |
for (let i = 0; i < all_url.length; i++) { | |
const url = all_url[i]; | |
const result = await scrapeData(url); | |
console.log(url); | |
data.push(...result); | |
} | |
// 単一のURLを処理する場合 | |
// const url = all_url[0]; | |
// const result = await scrapeData(url); | |
// console.log(url); | |
// data.push(...result); | |
// 結果をJSONファイルに保存 | |
// await fs.writeFile('./data2.json', JSON.stringify(data, null, 2)); | |
// const data2 = require('./data2.json'); | |
// data2から以下のような要素を削除する | |
// { | |
// "year": 2023, | |
// "rank": 104, | |
// "title": "\"Rich Flex\"\"Spin Bout U\"\"Creepin'\"\"Peaches & Eggplants\"", | |
// "artist": "" | |
// }, | |
// { | |
// "year": 2023, | |
// "rank": 105, | |
// "title": "", | |
// "artist": "" | |
// }, | |
// titleかartistどちらかが空の要素を削除する | |
// その後、結果でdata3.jsonを作成する | |
const data3 = data.filter((item) => { | |
if (item.title === '' || item.artist === '') { | |
return false; | |
} | |
return true; | |
} | |
); | |
// fsを読み込む | |
fs.writeFile('./billboard_100.json', JSON.stringify(data3, null, 2), (err) => { | |
if (err) { | |
console.error('Error writing file:', err); | |
} else { | |
console.log('Data successfully written to billboard_100.json'); | |
} | |
}); | |
// console.log('Data successfully written to data2.json'); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment